1. Hypothesis: PSC is associated with the gut microbiome changes.

Pre_LTx vs Post_LTx vs Healthy analysis on merged data

  • Alpha diversity –> group effect, country effect, interaction effect

  • Beta diversity – PERMANOVA, PCA -> group effect, country effect, interaction effect

  • DAA ->

    • o Group effect – linDA + MaAsLin2 intersection

    • o Country effect – linDA + MaAsLin2 union

Pre_ltx vs Healthy – by this comparison, we will find the effect of disease, liver cirrhosis and overall bad clinical condition

Pre_ltx vs post_ltx – by this comparison, we will find the effect of transplantation + general improvement of the clinical state

Post_ltx vs Healthy – by this comparison, we will find if the transplantation leads to „healthy microbiome”

TO DO —- Hopefully, we will find that all of these groups are somehow significantly different.

Regarding Post_LTx vs Healthy analysis, we can assume, that the differentially abundant taxa between these two groups remain because of the persistent gut MB alteration due to the PSC disease. Also, transplantation itself could add to this difference, as it has definitely strong impact on microbial composition.

Regarding Pre_LTx vs Healthy analysis, we can assume, that the differentially abundant taxa between these two groups are caused by PSC disease and other factors, like liver cirrhosis and overall bad clinical condition. To get only PSC associated taxa, we can intersect differentially abundant taxa from these aforementioned analyses (see diagram below).

imagename
imagename

TO DO: IMPORTOVAT TABULKU 1

source("custom_functions.R")

Data Import

Importing ASV, taxa and metadata tables for both Czech and Norway samples.

Czech

path = "../../data/analysis_ready_data/ikem/"
asv_tab_ikem <- as.data.frame(fread(file.path(path,"asv_table_ikem.csv"),
                                    check.names = FALSE))
taxa_tab_ikem <- as.data.frame(fread(file.path(path,"taxa_table_ikem.csv"),
                                     check.names = FALSE))
metadata_ikem <- as.data.frame(fread(file.path(path,"metadata_ikem.csv"),
                                     check.names = FALSE))

Spliting to segments

TO DO: STATISTICS OF READS

Merging two countries based on the different matrices - Ileum, Colon.

Terminal ileum

ileum_data <- merging_data(asv_tab_1=asv_tab_ikem,
                           asv_tab_2=NULL,
                           taxa_tab_1=taxa_tab_ikem,
                           taxa_tab_2=NULL,
                           metadata_1=metadata_ikem,
                           metadata_2=NULL,
                           segment="TI",Q="Q1")
## Removing 1498 ASV(s)
ileum_asv_tab <- ileum_data[[1]]
ileum_taxa_tab <- ileum_data[[2]]
ileum_metadata <- ileum_data[[3]]

Colon

colon_data <- merging_data(asv_tab_1=asv_tab_ikem,
                           asv_tab_2=NULL,
                           taxa_tab_1=taxa_tab_ikem,
                           taxa_tab_2=NULL,
                           metadata_1=metadata_ikem,
                           metadata_2=NULL,
                           segment="colon",Q="Q1")
## Removing 739 ASV(s)
colon_asv_tab <- colon_data[[1]]
colon_taxa_tab <- colon_data[[2]]
colon_metadata <- colon_data[[3]]

Data Analysis - Terminal ileum

segment="terminal_ileum"

Filtering

Rules: - prevalence > 5% (per group) - nearZeroVar with default settings - sequencing depth > 5000 - taxonomic assignment at least order

Library size

read_counts(ileum_asv_tab, line = c(5000,10000))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

Sequencing depth

data_filt <- seq_depth_filtering(ileum_asv_tab,
                                 ileum_taxa_tab,
                                 ileum_metadata,
                                 seq_depth_threshold = 10000)
## Removing 73 ASV(s)
filt_ileum_asv_tab <- data_filt[[1]]; alpha_ileum_asv_tab <- filt_ileum_asv_tab
filt_ileum_taxa_tab <- data_filt[[2]]; alpha_ileum_taxa_tab <- filt_ileum_taxa_tab
filt_ileum_metadata <- data_filt[[3]]; alpha_ileum_metadata <- filt_ileum_metadata

seq_step <- dim(filt_ileum_asv_tab)[1]

Library size

read_counts(filt_ileum_asv_tab,line = c(5000,10000))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

NearZeroVar

data_filt <- nearzerovar_filtering(filt_ileum_asv_tab, 
                                   filt_ileum_taxa_tab,
                                   filt_ileum_metadata)

filt_ileum_asv_tab <- data_filt[[1]]
filt_ileum_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_ileum_asv_tab)[1]

Library size

read_counts(filt_ileum_asv_tab,line = c(5000,10000))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

Final Counts

final_counts_filtering(ileum_asv_tab,
                       filt_ileum_asv_tab,
                       filt_ileum_metadata,
                       seq_step, 0, nearzero_step) %>% `colnames<-`("Count")

Alpha diversity

path = "../results/Q1_czech/alpha_diversity"

Calculation

# Construct MPSE object
alpha_ileum_metadata$Sample <- alpha_ileum_metadata$SampleID
ileum_mpse <- as.MPSE(construct_phyloseq(alpha_ileum_asv_tab,
                                         alpha_ileum_taxa_tab,
                                         alpha_ileum_metadata))

ileum_mpse %<>% mp_rrarefy(raresize = 10000,seed = 123)

# Calculate alpha diversity - rarefied counts
ileum_mpse %<>% mp_cal_alpha(.abundance=RareAbundance, force=TRUE)
alpha_div_plots <- list()

# preparing data frame
alpha_data <- data.frame(SampleID=ileum_mpse$Sample.x,
                         Observe=ileum_mpse$Observe,
                         Shannon=ileum_mpse$Shannon,
                         Simpson=ileum_mpse$Simpson,
                         Pielou=ileum_mpse$Pielou,
                         Group=ileum_mpse$Group,
                         Country=ileum_mpse$Country)

write.csv(alpha_data,file.path(path,paste0("alpha_indices_",segment,".csv")),
          row.names = FALSE)

Plots

Custom plot

alpha_data <- alpha_data %>% 
  dplyr::select(-c("Simpson","Pielou")) %>%
  mutate(Richness=Observe)

p_A <- alpha_diversity_custom_2(alpha_data,
                                size = 1.5,
                                width = 0.3)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
# save the results
alpha_div_plots[[paste(segment,"Custom")]] <- p_A

p_A

Linear Model

path = "../results/Q1_czech/alpha_diversity"
alpha_data <- read.csv(file.path(path,paste0("alpha_indices_",segment,".csv")))

Richness

results_model <- pairwise.lm(formula = "Observe ~ Group",
                             factors=alpha_data$Group,
                             data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_observe <- results_model[[1]]
  results_model_observe_emeans <- results_model[[2]]
} else {
  results_model_observe <- results_model
  results_model_observe_emeans <- NA
}

# save the results
pc_observed <- list(); 
pc_observed[[segment]] <- results_model_observe
# see the results
knitr::kable(results_model_observe,digits = 3,
caption = "Raw results of linear model of richness estimation.")
Raw results of linear model of richness estimation.
Estimate Std..Error t.value Pr…t.. p.adj sig
healthy vs Grouppre_ltx -53.267 12.976 -4.105 0.000 0.000 ***
pre_ltx vs Grouppost_ltx 25.120 11.755 2.137 0.035 0.035 *
healthy vs Grouppost_ltx -28.147 9.269 -3.037 0.003 0.004 **
# see the results
knitr::kable(results_model[[3]],digits = 3,
caption = "Group means")
Group means
Estimate Std. Error t value Pr(>|t|)
healthy 167.892 7.940 21.145 0
pre_ltx 114.625 10.576 10.838 0
healthy 167.892 8.139 20.628 0

Shannon

results_model <- pairwise.lm(formula = "Shannon ~ Group",
                             factors=alpha_data$Group,
                             data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_shannon <- results_model[[1]]
  results_model_shannon_emeans <- results_model[[2]]
} else {
  results_model_shannon <- results_model
  results_model_shannon_emeans <- NA
}

# save the results
pc_shannon <- list(); 
pc_shannon[[segment]] <- as.data.frame(results_model_shannon)
# see the results
knitr::kable(results_model_shannon,digits = 3,
caption = "Raw results of linear model of Shannon estimation.")
Raw results of linear model of Shannon estimation.
Estimate Std..Error t.value Pr…t.. p.adj sig
healthy vs Grouppre_ltx -0.506 0.175 -2.894 0.005 0.016 *
pre_ltx vs Grouppost_ltx 0.372 0.147 2.521 0.013 0.019 *
healthy vs Grouppost_ltx -0.134 0.110 -1.217 0.226 0.226
# see the results
knitr::kable(results_model[[3]],digits = 3,
caption = "Group means")
Group means
Estimate Std. Error t value Pr(>|t|)
healthy 3.600 0.094 38.165 0
pre_ltx 3.094 0.133 23.320 0
healthy 3.600 0.110 32.844 0

Simpson

results_model <- pairwise.lm(formula = "Simpson ~ Group",
                                     factors=alpha_data$Group,
                                     data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_simpson <- results_model[[1]]
  results_model_simpson_emeans <- results_model[[2]]
} else {
  results_model_simpson <- results_model
  results_model_simpson_emeans <- NA
}


# save the results
pc_simpson <- list(); 
pc_simpson[[segment]] <- as.data.frame(results_model_simpson)
# see the results
knitr::kable(results_model_simpson,digits = 3,
caption = "Raw results of linear model of Simpson estimation.")
Raw results of linear model of Simpson estimation.
Estimate Std..Error t.value Pr…t.. p.adj sig
healthy vs Grouppre_ltx -0.047 0.020 -2.339 0.023 0.068
pre_ltx vs Grouppost_ltx 0.037 0.021 1.776 0.078 0.117
healthy vs Grouppost_ltx -0.009 0.016 -0.577 0.565 0.565
# see the results
knitr::kable(results_model[[3]],digits = 3,
caption = "Group means")
Group means
Estimate Std. Error t value Pr(>|t|)
healthy 0.934 0.014 68.163 0
pre_ltx 0.887 0.019 46.712 0
healthy 0.934 0.013 74.536 0

Pielou

results_model <- pairwise.lm(formula = "Pielou ~ Group",
                                     factors=alpha_data$Group,
                                     data=alpha_data)

# check interaction

if (!is.data.frame(results_model)){
  results_model_pielou <- results_model[[1]]
  results_model_pielou_emeans <- results_model[[2]]
} else {
  results_model_pielou <- results_model
  results_model_pielou_emeans <- NA
}

# save the results
pc_pielou <- list(); 
pc_pielou[[segment]] <- as.data.frame(results_model_pielou)
# see the results
knitr::kable(results_model_pielou,digits = 3,
caption = "Raw results of linear model of Pielou estimation.")
Raw results of linear model of Pielou estimation.
Estimate Std..Error t.value Pr…t.. p.adj sig
healthy vs Grouppre_ltx -0.042 0.025 -1.671 0.100 0.150
pre_ltx vs Grouppost_ltx 0.043 0.021 2.012 0.046 0.139
healthy vs Grouppost_ltx 0.001 0.017 0.079 0.937 0.937
# see the results
knitr::kable(results_model[[3]],digits = 3,
caption = "Group means")
Group means
Estimate Std. Error t value Pr(>|t|)
healthy 0.706 0.014 48.736 0
pre_ltx 0.664 0.019 34.587 0
healthy 0.706 0.016 45.174 0

Saving results

alpha_list <- list(
  Richness=pc_observed[[segment]] %>% rownames_to_column("Comparison"),
  Shannon=pc_shannon[[segment]] %>% rownames_to_column("Comparison"),
  Simpson=pc_simpson[[segment]] %>% rownames_to_column("Comparison"),
  Pielou=pc_pielou[[segment]] %>% rownames_to_column("Comparison"))
                   
write.xlsx(alpha_list, 
           file = file.path(path,paste0("alpha_diversity_results_",segment,".xlsx")))

Beta diversity

Calculating Aitchison distance (euclidean distance on clr-transformed data), both at ASV and genus level.

Main analysis - Genus, Aitchison

Genus level, Aitchison distance

level="genus"
path = "../results/Q1_czech/beta_diversity"
pairwise_aitchison_raw <- list()
pca_plots_list <- list()

Aggregation, filtering

# Aggregation
genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level=level,
                             names=TRUE)
# Filtration
filt_data <- filtering_steps(genus_data[[1]],
                             genus_data[[2]],
                             ileum_metadata,
                             seq_depth_threshold=10000)
## Removing 3 ASV(s)
filt_ileum_genus_tab <- filt_data[[1]]
filt_ileum_genus_taxa <- filt_data[[2]]
filt_ileum_metadata_genus <- filt_data[[3]]
PERMANOVA
pairwise_df <- filt_ileum_genus_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_ileum_metadata_genus$Group,
                           covariate = NULL, 
                           sim.method = "robust.aitchison", p.adjust.m="BH")


# tidy the results
pp_factor <- pp_main

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols

# save raw results
pairwise_aitchison_raw[[paste(level, segment)]] <- pp_factor
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 285.032 1.705 0.028 0.002 0.003 **
pre_ltx vs post_ltx 1 192.724 1.059 0.008 0.305 0.305
post_ltx vs healthy 1 488.822 2.712 0.019 0.001 0.003 **
Plots

Custom

p <- pca_plot_custom(filt_ileum_genus_tab,
                                 filt_ileum_genus_taxa,
                                 filt_ileum_metadata_genus,
                                 show_boxplots = TRUE,
                                 variable = "Group", size=3, show_legend=TRUE)

# save the results
pca_plots_list[[paste(segment,level,"custom")]] <- p

# see the results
p

Saving results

write.xlsx(pairwise_aitchison_raw[[paste(level, segment)]], 
           file = file.path(path,
           paste0("beta_diversity_results_", segment,".xlsx")))

Supplementary analysis

supplements_beta <- list()

Genus level

level="genus"
Bray-Curtis

PERMANOVA

pairwise_df <- filt_ileum_genus_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_ileum_metadata_genus$Group,
                           covariate = NULL, 
                           sim.method = "bray", p.adjust.m="BH")


# tidy the results
pp_factor <- pp_main

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols

# save raw results
supplements_beta[[paste("bray",level,segment)]] <- pp_factor
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 0.412 2.198 0.036 0.008 0.012 *
pre_ltx vs post_ltx 1 0.230 1.052 0.008 0.350 0.350
post_ltx vs healthy 1 0.771 3.752 0.027 0.001 0.003 **

Plots

p <- pca_plot_custom(filt_ileum_genus_tab,
                                 filt_ileum_genus_taxa,
                                 filt_ileum_metadata_genus,
                                 measure = "bray",
                                 show_boxplots = TRUE,
                                 variable = "Group", size=3, show_legend=TRUE)

# save the results
supplements_beta[[paste("PCoA bray",level,segment)]] <- p

# see the results
p

Jaccard

PERMANOVA

pairwise_df <- filt_ileum_genus_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_ileum_metadata_genus$Group,
                           covariate = NULL, 
                           sim.method = "jaccard", p.adjust.m="BH")


# tidy the results
pp_factor <- pp_main

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols

# save raw results
supplements_beta[[paste("jaccard",level,segment)]] <- pp_factor
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 0.499 1.835 0.030 0.005 0.007 **
pre_ltx vs post_ltx 1 0.333 1.101 0.009 0.262 0.262
post_ltx vs healthy 1 0.865 2.980 0.021 0.001 0.003 **

Plots

Custom

p <- pca_plot_custom(filt_ileum_genus_tab,
                                 filt_ileum_genus_taxa,
                                 filt_ileum_metadata_genus,
                                 measure = "jaccard",
                                 show_boxplots = TRUE,
                                 variable = "Group", size=3, show_legend=TRUE)

# save the results
supplements_beta[[paste("PCoA jaccard",level,segment)]] <- p

# see the results
p

ASV level

level="ASV"
Aitchison

PERMANOVA

# preparing data frame
pairwise_df <- filt_ileum_asv_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,
                           covariate = NULL, 
                           sim.method = "robust.aitchison", p.adjust.m="BH")


# tidy the results
pp_factor <- pp_main

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols

# save raw results
supplements_beta[[paste("aitchison",level,segment)]] <- pp_factor
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 484.192 1.552 0.026 0.002 0.003 **
pre_ltx vs post_ltx 1 320.395 1.130 0.009 0.121 0.121
post_ltx vs healthy 1 877.994 2.987 0.021 0.001 0.003 **

PCoA

p <- pca_plot_custom(filt_ileum_asv_tab,
                           filt_ileum_taxa_tab,
                           filt_ileum_metadata,
                           show_boxplots = TRUE,
                           variable = "Group", size=3, show_legend=TRUE)

# save the results
supplements_beta[[paste("PCoA aitchison",level,segment)]] <- p

# see the results
p

Bray-Curtis

PERMANOVA

# preparing data frame
pairwise_df <- filt_ileum_asv_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,
                           covariate = NULL, 
                           sim.method = "bray", p.adjust.m="BH")


# tidy the results
pp_factor <- pp_main

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols

# save raw results
supplements_beta[[paste("bray",level,segment)]] <- pp_factor
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 0.566 1.748 0.029 0.007 0.011 *
pre_ltx vs post_ltx 1 0.437 1.284 0.010 0.113 0.113
post_ltx vs healthy 1 1.212 3.689 0.026 0.001 0.003 **

PCoA

p <- pca_plot_custom(filt_ileum_asv_tab,
                     filt_ileum_taxa_tab,
                     filt_ileum_metadata,
                     measure = "bray",
                     show_boxplots = TRUE,
                     variable = "Group", size=3, show_legend=TRUE)

# save the results
supplements_beta[[paste("PCoA bray",level,segment)]] <- p

# see the results
p

Jaccard

PERMANOVA

# preparing data frame
pairwise_df <- filt_ileum_asv_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,
                           covariate = NULL, 
                           sim.method = "jaccard", p.adjust.m="BH")


# tidy the results
pp_factor <- pp_main

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols

# save raw results
supplements_beta[[paste("jaccard",level,segment)]] <- pp_factor
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 0.558 1.423 0.024 0.008 0.012 *
pre_ltx vs post_ltx 1 0.474 1.174 0.009 0.102 0.102
post_ltx vs healthy 1 1.027 2.597 0.019 0.001 0.003 **

PCoA

p <- pca_plot_custom(filt_ileum_asv_tab,
                     filt_ileum_taxa_tab,
                     filt_ileum_metadata,
                     measure = "jaccard",
                     show_boxplots = TRUE,
                     variable = "Group", size=3, show_legend=TRUE)

# save the results
supplements_beta[[paste("PCoA jaccard",level,segment)]] <- p

# see the results
p

Saving results

write.xlsx(supplements_beta[!grepl("PCoA",names(supplements_beta))],
           file = file.path(path,
           paste0("supplements_beta_diversity_", segment,".xlsx")))

Univariate Analysis

Main - Genus level

level="genus"
# needed paths
path = "../results/Q1_czech/univariate_analysis"
path_maaslin=file.path("../intermediate_files/maaslin/Q1_czech",level)
# variables
raw_linda_results_genus <- list();
raw_linda_results_genus[[segment]] <- list()
linda_results_genus <- list(); 
linda_results_genus[[segment]] <- list()

# country and interaction problems
list_country_union <- list()
list_intersections <- list()
list_venns <- list()
uni_statistics <- list()

# workbook for final df
wb <- createWorkbook()

# PSC effect
psc_effect <- list()

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]

ileum_genus_asv_taxa_tab <- create_asv_taxa_table(ileum_genus_tab,
                                                  ileum_genus_taxa_tab)

pre_ltx vs healthy

group <- c("healthy","pre_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
linDA
# prepare the data
linda_data <- binomial_prep(ileum_genus_tab,
                            ileum_genus_taxa_tab,
                            ileum_metadata,
                            group, usage="linDA")
## Removing 93 ASV(s)
## Removing 8 ASV(s)
filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_ileum_uni_data, 
                   filt_ileum_uni_metadata, 
                   formula = '~ Group')
## 0  features are filtered!
## The filtered data has  61  samples and  212  features will be tested!
## Warning in linda(filt_ileum_uni_data, filt_ileum_uni_metadata, formula = "~ Group"): Some features have less than 3 nonzero values! 
##                      They have virtually no statistical power. You may consider filtering them in the analysis!
## Imputation approach is used.
## Fit linear models ...
## Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])

for (grp in c(group1)){
  raw_linda_results_genus[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_ileum_uni_data,
                filt_ileum_uni_taxa)
  
  linda_results_genus[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_ileum_uni_data,
             filt_ileum_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, 
                                group1, 
                                taxa_table = filt_ileum_uni_taxa) + 
            ggtitle(comparison_name)

volcano <- ggarrange(volcano_1)
volcano

MaAsLin2
volcano1 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) + 
            ggtitle(comparison_name)

volcano <- ggarrange(volcano1)
## Warning: Removed 10 rows containing missing values or values outside the scale range (`geom_text_repel()`).
volcano

Group - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results_genus, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Basic statistics
uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results_genus[[segment]][[group1]],
                 by="SeqID",all=TRUE)
## [1] "healthy"
## [1] "pre_ltx"
## [1] "healthy"
## [1] "pre_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
linDA
# prepare the data
linda_data <- binomial_prep(ileum_genus_tab,
                            ileum_genus_taxa_tab,
                            ileum_metadata,
                            group, usage="linDA")
## Removing 41 ASV(s)
## Removing 3 ASV(s)
filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_ileum_uni_data, 
                   filt_ileum_uni_metadata, 
                   formula = '~ Group')
## 0  features are filtered!
## The filtered data has  126  samples and  196  features will be tested!
## Warning in linda(filt_ileum_uni_data, filt_ileum_uni_metadata, formula = "~ Group"): Some features have less than 3 nonzero values! 
##                      They have virtually no statistical power. You may consider filtering them in the analysis!
## Imputation approach is used.
## Fit linear models ...
## Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])

for (grp in c(group1)){
  raw_linda_results_genus[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_ileum_uni_data,
                filt_ileum_uni_taxa)
  
  linda_results_genus[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_ileum_uni_data,
             filt_ileum_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, 
                                group1, 
                                taxa_table = filt_ileum_uni_taxa) + 
            ggtitle(comparison_name)


volcano <- ggarrange(volcano_1)
volcano

MaAsLin2
volcano1 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) + 
            ggtitle(comparison_name)

volcano <- ggarrange(volcano1)
## Warning: Removed 1 row containing missing values or values outside the scale range (`geom_text_repel()`).
volcano

Group - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results_genus, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Basic statistics
uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results_genus[[segment]][[group1]],
                 by="SeqID",all=TRUE)
## [1] "pre_ltx"
## [1] "post_ltx"
## [1] "pre_ltx"
## [1] "post_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)

post_ltx vs healthy

group <- c("healthy","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
linDA
# prepare the data
linda_data <- binomial_prep(ileum_genus_tab,
                            ileum_genus_taxa_tab,
                            ileum_metadata,
                            group, usage="linDA")
## Removing 16 ASV(s)
## Removing 2 ASV(s)
filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_ileum_uni_data, 
                   filt_ileum_uni_metadata, 
                   formula = '~ Group')
## 0  features are filtered!
## The filtered data has  139  samples and  194  features will be tested!
## Pseudo-count approach is used.
## Fit linear models ...
## Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])

for (grp in c(group1)){
  raw_linda_results_genus[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_ileum_uni_data,
                filt_ileum_uni_taxa)
  
  linda_results_genus[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_ileum_uni_data,
             filt_ileum_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, 
                                group1, 
                                taxa_table = filt_ileum_uni_taxa) + 
            ggtitle(comparison_name)

volcano <- ggarrange(volcano_1)
volcano

MaAsLin2
volcano1 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) + 
            ggtitle(comparison_name)

volcano <- ggarrange(volcano1)
## Warning: Removed 22 rows containing missing values or values outside the scale range (`geom_text_repel()`).
volcano

Group - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results_genus, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Basic statistics
uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results_genus[[segment]][[group1]],
                 by="SeqID",all=TRUE)
## [1] "healthy"
## [1] "post_ltx"
## [1] "healthy"
## [1] "post_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)

Visualization

Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.

list_heatmap <- list_intersections[grep(paste(segment,level),
                                  names(list_intersections),value=TRUE)]

p_heatmap_linda <- heatmap_linda(list_heatmap,ileum_taxa_tab)
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. Please do this redirection yourself like
## reshape2::dcast(plot_df_combined). In the next version, this warning will become an error.
p_heatmap_linda

Dot heatmap

dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
                                      uni_df,
                                      ileum_taxa_tab)
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## min_clr -1.585133 
## max_clr 7.006078 
## min_log -5.092591 
## max_log 5.066079
dotheatmap_linda

PSC effect

pre_LTx vs Healthy and Post_LTx vs Healthy intersection

imagename
imagename
A <- list_intersections[[paste(segment,level,"healthy vs pre_ltx")]]$SeqID
B <- list_intersections[[paste(segment,level,"healthy vs post_ltx")]]$SeqID
C <- intersect(A,B)

psc_effect[[paste(segment,level)]] <- list_intersections[[paste(segment,level, "healthy vs pre_ltx")]][list_intersections[[paste(segment,level, "healthy vs pre_ltx")]]$SeqID %in% C,]
  
# see the results
psc_effect[[paste(segment,level)]] 

Saving results

# ALL DATA
saveWorkbook(wb,file.path(path,paste0("uni_analysis_wb_",segment,".xlsx")),
             overwrite = TRUE)

# PSC effect
write.xlsx(psc_effect[[paste(segment,level)]],file.path(path,paste0("psc_effect_",segment,".xlsx")))

# SIGNIFICANT taxa

write.xlsx(list_intersections[grepl(segment,names(list_intersections))] %>%
            `names<-`(gsub(segment, "", names(
              list_intersections[grepl(segment,names(list_intersections))]))),
           file.path(path,paste0("significant_taxa_",segment,".xlsx")))

Supplementary Analysis

supplements_uni <- list()
supplements_wb <- createWorkbook()

ASV level

level="ASV"
path_maaslin="../intermediate_files/maaslin/Q1_czech/ASV/"
raw_linda_results <- list();
raw_linda_results[[segment]] <- list()
linda_results <- list(); 
linda_results[[segment]] <- list()
pre_ltx vs healthy
group <- c("healthy","pre_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])

linDA

# prepare the data
linda_data <- binomial_prep(ileum_asv_tab,
                            ileum_taxa_tab,
                            ileum_metadata,
                            group, usage="linDA")
## Removing 1478 ASV(s)
## Removing 118 ASV(s)
filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_ileum_uni_data, 
                   filt_ileum_uni_metadata, 
                   formula = '~ Group')
## 0  features are filtered!
## The filtered data has  61  samples and  738  features will be tested!
## Warning in linda(filt_ileum_uni_data, filt_ileum_uni_metadata, formula = "~ Group"): Some features have less than 3 nonzero values! 
##                      They have virtually no statistical power. You may consider filtering them in the analysis!
## Imputation approach is used.
## Fit linear models ...
## Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])

for (grp in c(group1)){
  raw_linda_results[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_ileum_uni_data,
                filt_ileum_uni_taxa)
  
  linda_results[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_ileum_uni_data,
             filt_ileum_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, 
                                group1, 
                                taxa_table = filt_ileum_uni_taxa) + 
            ggtitle(comparison_name)

volcano <- ggarrange(volcano_1, ncol=3)

MaAsLin2

volcano1 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) + 
            ggtitle(comparison_name)

volcano <- ggarrange(volcano1, ncol=2)
volcano

Group - Intersection

intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Basic statistics

uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results[[segment]][[group1]],
                 by="SeqID",all=TRUE)
## [1] "healthy"
## [1] "pre_ltx"
## [1] "healthy"
## [1] "pre_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- paste(level,comparison_name)
addWorksheet(supplements_wb, sheetName = new_name)
writeData(supplements_wb, sheet = new_name, uni_df, rowNames=FALSE)
pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])

linDA

# prepare the data
linda_data <- binomial_prep(ileum_asv_tab,
                            ileum_taxa_tab,
                            ileum_metadata,
                            group, usage="linDA")
## Removing 555 ASV(s)
## Removing 36 ASV(s)
filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_ileum_uni_data, 
                   filt_ileum_uni_metadata, 
                   formula = '~ Group')
## 0  features are filtered!
## The filtered data has  126  samples and  526  features will be tested!
## Warning in linda(filt_ileum_uni_data, filt_ileum_uni_metadata, formula = "~ Group"): Some features have less than 3 nonzero values! 
##                      They have virtually no statistical power. You may consider filtering them in the analysis!
## Imputation approach is used.
## Fit linear models ...
## Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])

for (grp in c(group1)){
  raw_linda_results[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_ileum_uni_data,
                filt_ileum_uni_taxa)
  
  linda_results[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_ileum_uni_data,
             filt_ileum_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, 
                                group1, 
                                taxa_table = filt_ileum_uni_taxa) + 
            ggtitle(comparison_name)

volcano <- ggarrange(volcano_1)

MaAsLin2

volcano1 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) + 
            ggtitle(comparison_name)

volcano <- ggarrange(volcano1)
volcano

Group - Intersection

intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Basic statistics

uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results[[segment]][[group1]],
                 by="SeqID",all=TRUE)
## [1] "pre_ltx"
## [1] "post_ltx"
## [1] "pre_ltx"
## [1] "post_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- paste(level,comparison_name)
addWorksheet(supplements_wb, sheetName = new_name)
writeData(supplements_wb, sheet = new_name, uni_df, rowNames=FALSE)
post_ltx vs healthy
group <- c("healthy","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])

linDA

# prepare the data
linda_data <- binomial_prep(ileum_asv_tab,
                            ileum_taxa_tab,
                            ileum_metadata,
                            group, usage="linDA")
## Removing 260 ASV(s)
## Removing 48 ASV(s)
filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_ileum_uni_data, 
                   filt_ileum_uni_metadata, 
                   formula = '~ Group')
## 0  features are filtered!
## The filtered data has  139  samples and  617  features will be tested!
## Warning in linda(filt_ileum_uni_data, filt_ileum_uni_metadata, formula = "~ Group"): Some features have less than 3 nonzero values! 
##                      They have virtually no statistical power. You may consider filtering them in the analysis!
## Pseudo-count approach is used.
## Fit linear models ...
## Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])

for (grp in c(group1)){
  raw_linda_results[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_ileum_uni_data,
                filt_ileum_uni_taxa)
  
  linda_results[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_ileum_uni_data,
             filt_ileum_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, 
                                group1, 
                                taxa_table = filt_ileum_uni_taxa) + 
            ggtitle(comparison_name)


volcano <- ggarrange(volcano_1)

MaAsLin2

volcano1 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) + 
            ggtitle(comparison_name)


volcano <- ggarrange(volcano1)
volcano
## Warning: ggrepel: 68 unlabeled data points (too many overlaps). Consider increasing max.overlaps

Group - Intersection

intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Basic statistics

uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results[[segment]][[group1]],
                 by="SeqID",all=TRUE)
## [1] "healthy"
## [1] "post_ltx"
## [1] "healthy"
## [1] "post_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- paste(level,comparison_name)
addWorksheet(supplements_wb, sheetName = new_name)
writeData(supplements_wb, sheet = new_name, uni_df, rowNames=FALSE)
Visualization

Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.

list_heatmap <- list_intersections[grep(paste(segment,level),
                                  names(list_intersections),value=TRUE)]

p_heatmap_linda <- heatmap_linda(list_heatmap,ileum_taxa_tab)
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. Please do this redirection yourself like
## reshape2::dcast(plot_df_combined). In the next version, this warning will become an error.
p_heatmap_linda

Dot heatmap

dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
                                      uni_df,
                                      ileum_taxa_tab)
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## min_clr -0.7209313 
## max_clr 6.386569 
## min_log -5.127704 
## max_log 4.154206
dotheatmap_linda

PSC effect

pre_LTx vs Healthy and Post_LTx vs Healthy intersection

imagename
imagename
A <- list_intersections[[paste(segment,level,"healthy vs pre_ltx")]]$SeqID
B <- list_intersections[[paste(segment,level,"healthy vs post_ltx")]]$SeqID
C <- intersect(A,B)

psc_effect[[paste(segment,level)]] <- list_intersections[[paste(segment,level, "healthy vs pre_ltx")]][list_intersections[[paste(segment,level, "healthy vs pre_ltx")]]$SeqID %in% C,]
  
# see the results
psc_effect[[paste(segment,level)]] 

Phylum level

level="phylum"
path_maaslin="../intermediate_files/maaslin/Q1_czech/Phylum/"
raw_linda_results_phylum <- list();
raw_linda_results_phylum[[segment]] <- list()
linda_results_phylum <- list(); 
linda_results_phylum[[segment]] <- list()

Aggregate taxa

phylum_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = "Phylum")

ileum_phylum_tab <- phylum_data[[1]]
ileum_phylum_taxa_tab <- phylum_data[[2]]
pre_ltx vs healthy
group <- c("healthy","pre_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])

linDA

# prepare the data
linda_data <- binomial_prep(ileum_phylum_tab,
                            ileum_phylum_taxa_tab,
                            ileum_metadata,
                            group, usage="linDA")
## Removing 2 ASV(s)
filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_ileum_uni_data, 
                   filt_ileum_uni_metadata, 
                   formula = '~ Group')
## 0  features are filtered!
## The filtered data has  61  samples and  9  features will be tested!
## Pseudo-count approach is used.
## Fit linear models ...
## Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])

for (grp in c(group1)){
  raw_linda_results_phylum[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_ileum_uni_data,
                filt_ileum_uni_taxa)
  
  linda_results_phylum[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_ileum_uni_data,
             filt_ileum_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, 
                                group1, 
                                taxa_table = filt_ileum_uni_taxa) + 
            ggtitle(comparison_name)
## Using Phylum for naming
volcano <- ggarrange(volcano_1)

MaAsLin2

volcano1 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) + 
            ggtitle(comparison_name)

volcano <- ggarrange(volcano1)
volcano

Group - Intersection

intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results_phylum, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Basic statistics

uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results_phylum[[segment]][[group1]],
                 by="SeqID",all=TRUE)
## [1] "healthy"
## [1] "pre_ltx"
## [1] "healthy"
## [1] "pre_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- paste(level,comparison_name)
addWorksheet(supplements_wb, sheetName = new_name)
writeData(supplements_wb, sheet = new_name, uni_df, rowNames=FALSE)
pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])

linDA

# prepare the data
linda_data <- binomial_prep(ileum_phylum_tab,
                            ileum_phylum_taxa_tab,
                            ileum_metadata,
                            group, usage="linDA")
## Removing 2 ASV(s)
filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_ileum_uni_data, 
                   filt_ileum_uni_metadata, 
                   formula = '~ Group')
## 0  features are filtered!
## The filtered data has  126  samples and  10  features will be tested!
## Imputation approach is used.
## Fit linear models ...
## Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])

for (grp in c(group1)){
  raw_linda_results_phylum[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_ileum_uni_data,
                filt_ileum_uni_taxa)
  
  linda_results_phylum[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_ileum_uni_data,
             filt_ileum_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, 
                                group1, 
                                taxa_table = filt_ileum_uni_taxa) + 
            ggtitle(comparison_name)
## Using Phylum for naming
volcano <- ggarrange(volcano_1)
volcano

MaAsLin2

volcano1 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) + 
            ggtitle(comparison_name)

volcano <- ggarrange(volcano1)
volcano

Group - Intersection

intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results_phylum, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Basic statistics

uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results_phylum[[segment]][[group1]],
                 by="SeqID",all=TRUE)
## [1] "pre_ltx"
## [1] "post_ltx"
## [1] "pre_ltx"
## [1] "post_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- paste(level,comparison_name)
addWorksheet(supplements_wb, sheetName = new_name)
writeData(supplements_wb, sheet = new_name, uni_df, rowNames=FALSE)
post_ltx vs healthy
group <- c("healthy","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])

linDA

# prepare the data
linda_data <- binomial_prep(ileum_phylum_tab,
                            ileum_phylum_taxa_tab,
                            ileum_metadata,
                            group, usage="linDA")

filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_ileum_uni_data, 
                   filt_ileum_uni_metadata, 
                   formula = '~ Group')
## 0  features are filtered!
## The filtered data has  139  samples and  10  features will be tested!
## Imputation approach is used.
## Fit linear models ...
## Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])

for (grp in c(group1)){
  raw_linda_results_phylum[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_ileum_uni_data,
                filt_ileum_uni_taxa)
  
  linda_results_phylum[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_ileum_uni_data,
             filt_ileum_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, 
                                group1, 
                                taxa_table = filt_ileum_uni_taxa) + 
            ggtitle(comparison_name)
## Using Phylum for naming
volcano <- ggarrange(volcano_1)
volcano

MaAsLin2

volcano1 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) + 
            ggtitle(comparison_name)

volcano <- ggarrange(volcano1)
volcano

Group - Intersection

intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results_phylum, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Basic statistics

uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results_phylum[[segment]][[group1]],
                 by="SeqID",all=TRUE)
## [1] "healthy"
## [1] "post_ltx"
## [1] "healthy"
## [1] "post_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- paste(level,comparison_name)
addWorksheet(supplements_wb, sheetName = new_name)
writeData(supplements_wb, sheet = new_name, uni_df, rowNames=FALSE)
Visualization

Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.

list_heatmap <- list_intersections[grep(paste(segment,level),
                                  names(list_intersections),value=TRUE)]

p_heatmap_linda <- heatmap_linda(list_heatmap,ileum_taxa_tab)
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. Please do this redirection yourself like
## reshape2::dcast(plot_df_combined). In the next version, this warning will become an error.
p_heatmap_linda

Dot heatmap

dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
                                      uni_df,
                                      ileum_taxa_tab)
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## min_clr -4.023876 
## max_clr -0.6380426 
## min_log -2.665094 
## max_log 2.803749
dotheatmap_linda

PSC effect

pre_LTx vs Healthy and Post_LTx vs Healthy intersection

imagename
imagename
A <- list_intersections[[paste(segment,level,"healthy vs pre_ltx")]]$SeqID
B <- list_intersections[[paste(segment,level,"healthy vs post_ltx")]]$SeqID
C <- intersect(A,B)

psc_effect[[paste(segment,level)]] <- list_intersections[[paste(segment,level, "healthy vs pre_ltx")]][list_intersections[[paste(segment,level, "healthy vs pre_ltx")]]$SeqID %in% C,]
  
# see the results
psc_effect[[paste(segment,level)]] 

Saving results

# ALL DATA
saveWorkbook(supplements_wb,file.path(path,paste0("supplements_uni_analysis_wb_",segment,".xlsx")),overwrite = TRUE)

# PSC effect
write.xlsx(psc_effect,
          file.path(path,paste0("supplements_psc_effect_",segment,".xlsx")))

# SIGNIFICANT taxa
write.xlsx(list_intersections[grepl(segment,names(list_intersections))] %>%
            `names<-`(gsub(segment, "", names(
              list_intersections[grepl(segment,names(list_intersections))]))),
           file.path(path,paste0("supplements_significant_taxa_",segment,".xlsx")))

Machine learning

path = "../results/Q1_czech/models"

ElasticNet

model="enet"

ASV level

level="ASV"
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")
## Removing 1478 ASV(s)
## Removing 118 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
models_summ <- list()
models_cm <- list()
betas <- list()
roc_cs <- list()

models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
##                                      [,1]
## alpha                           0.0000000
## lambda                          4.4297736
## auc                             1.0000000
## auc_czech                       1.0000000
## auc_no                                NaN
## auc_optimism_corrected          0.9125173
## auc_optimism_corrected_CIL      0.7460227
## auc_optimism_corrected_CIU      0.9719818
## accuracy                        1.0000000
## accuracy_czech                        NaN
## accuracy_no                           NaN
## accuracy_optimism_corrected     0.8436514
## accuracy_optimism_corrected_CIL 0.6576087
## accuracy_optimism_corrected_CIU 0.9047619
enet_model$conf_matrices
## $original
##     Predicted
## True  0  1
##    0 37  0
##    1  0 24
## 
## $czech
##     Predicted
## True  0  1
##    0 37  0
##    1  0 24
## 
## $no
## [1] NaN
enet_model$plot
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

roc_c

pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
## Removing 555 ASV(s)
## Removing 36 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
##                                       [,1]
## alpha                            0.0000000
## lambda                          23.6156255
## auc                              0.9485294
## auc_czech                        0.9485294
## auc_no                                 NaN
## auc_optimism_corrected           0.6948681
## auc_optimism_corrected_CIL       0.5824476
## auc_optimism_corrected_CIU       0.8110143
## accuracy                         0.8095238
## accuracy_czech                         NaN
## accuracy_no                            NaN
## accuracy_optimism_corrected      0.7919350
## accuracy_optimism_corrected_CIL  0.7250614
## accuracy_optimism_corrected_CIU  0.8608075
enet_model$conf_matrices
## $original
##     0  
## 0 102 0
## 1  24 0
## 
## $czech
##     0  
## 0 102 0
## 1  24 0
## 
## $no
## [1] NaN
enet_model$plot
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

roc_c

post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
## Removing 260 ASV(s)
## Removing 48 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
##                                       [,1]
## alpha                           0.40000000
## lambda                          0.03937419
## auc                             1.00000000
## auc_czech                       1.00000000
## auc_no                                 NaN
## auc_optimism_corrected          0.94206423
## auc_optimism_corrected_CIL      0.90105239
## auc_optimism_corrected_CIU      0.98566042
## accuracy                        1.00000000
## accuracy_czech                         NaN
## accuracy_no                            NaN
## accuracy_optimism_corrected     0.84400802
## accuracy_optimism_corrected_CIL 0.76024411
## accuracy_optimism_corrected_CIU 0.91915820
enet_model$conf_matrices
## $original
##     Predicted
## True   0   1
##    0  37   0
##    1   0 102
## 
## $czech
##     Predicted
## True   0   1
##    0  37   0
##    1   0 102
## 
## $no
## [1] NaN
enet_model$plot
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

roc_c

Genus level

level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
## Removing 93 ASV(s)
## Removing 8 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
##                                      [,1]
## alpha                           0.2000000
## lambda                          0.1579673
## auc                             1.0000000
## auc_czech                       1.0000000
## auc_no                                NaN
## auc_optimism_corrected          0.9130841
## auc_optimism_corrected_CIL      0.8491286
## auc_optimism_corrected_CIU      0.9958333
## accuracy                        1.0000000
## accuracy_czech                        NaN
## accuracy_no                           NaN
## accuracy_optimism_corrected     0.8302828
## accuracy_optimism_corrected_CIL 0.6741848
## accuracy_optimism_corrected_CIU 0.9081169
enet_model$conf_matrices
## $original
##     Predicted
## True  0  1
##    0 37  0
##    1  0 24
## 
## $czech
##     Predicted
## True  0  1
##    0 37  0
##    1  0 24
## 
## $no
## [1] NaN
enet_model$plot
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

roc_c

pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
## Removing 41 ASV(s)
## Removing 3 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
##                                      [,1]
## alpha                           0.0000000
## lambda                          4.8657540
## auc                             0.9485294
## auc_czech                       0.9485294
## auc_no                                NaN
## auc_optimism_corrected          0.7094762
## auc_optimism_corrected_CIL      0.6273274
## auc_optimism_corrected_CIU      0.8340541
## accuracy                        0.8095238
## accuracy_czech                        NaN
## accuracy_no                           NaN
## accuracy_optimism_corrected     0.7751889
## accuracy_optimism_corrected_CIL 0.6955785
## accuracy_optimism_corrected_CIU 0.8571172
enet_model$conf_matrices
## $original
##     0  
## 0 102 0
## 1  24 0
## 
## $czech
##     0  
## 0 102 0
## 1  24 0
## 
## $no
## [1] NaN
enet_model$plot
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

roc_c

post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,group, 
                                     usage="ml_clr")
## Removing 16 ASV(s)
## Removing 2 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
##                                       [,1]
## alpha                           0.40000000
## lambda                          0.02563917
## auc                             1.00000000
## auc_czech                       1.00000000
## auc_no                                 NaN
## auc_optimism_corrected          0.95484421
## auc_optimism_corrected_CIL      0.92410714
## auc_optimism_corrected_CIU      0.99290392
## accuracy                        1.00000000
## accuracy_czech                         NaN
## accuracy_no                            NaN
## accuracy_optimism_corrected     0.88512801
## accuracy_optimism_corrected_CIL 0.83058511
## accuracy_optimism_corrected_CIU 0.93830733
enet_model$conf_matrices
## $original
##     Predicted
## True   0   1
##    0  37   0
##    1   0 102
## 
## $czech
##     Predicted
## True   0   1
##    0  37   0
##    1   0 102
## 
## $no
## [1] NaN
enet_model$plot
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

roc_c

Saving results

models_summ_df_ileum <- do.call(rbind, 
  models_summ[grep(segment,names(models_summ),value = TRUE)])

write.csv(models_summ_df_ileum,file.path(path,paste0("elastic_net_",segment,".csv")))

Supplementary models

supplements_models <- list()

CLR-transformed data

kNN
model="knn"
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")
## Removing 1478 ASV(s)
## Removing 118 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               19.0000000
## auc                              0.9481982
## auc_optimism_corrected           0.9111788
## auc_optimism_corrected_CIL       0.8459517
## auc_optimism_corrected_CIU       0.9668485
## accuracy                         0.8852459
## accuracy_optimism_corrected      0.8197064
## accuracy_optimism_corrected_CIL  0.7415761
## accuracy_optimism_corrected_CIU  0.9047619
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)


# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")
## Removing 555 ASV(s)
## Removing 36 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               11.0000000
## auc                              0.8386438
## auc_optimism_corrected           0.6766450
## auc_optimism_corrected_CIL       0.5849351
## auc_optimism_corrected_CIU       0.7871080
## accuracy                         0.8333333
## accuracy_optimism_corrected      0.7752687
## accuracy_optimism_corrected_CIL  0.7326574
## accuracy_optimism_corrected_CIU  0.8345170
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")
## Removing 260 ASV(s)
## Removing 48 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               12.0000000
## auc                              0.9048755
## auc_optimism_corrected           0.8180820
## auc_optimism_corrected_CIL       0.7052114
## auc_optimism_corrected_CIU       0.9524700
## accuracy                         0.8848921
## accuracy_optimism_corrected      0.7978793
## accuracy_optimism_corrected_CIL  0.7233586
## accuracy_optimism_corrected_CIU  0.8995192
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
## Removing 93 ASV(s)
## Removing 8 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               10.0000000
## auc                              0.9549550
## auc_optimism_corrected           0.8580076
## auc_optimism_corrected_CIL       0.7908357
## auc_optimism_corrected_CIU       0.9711779
## accuracy                         0.7540984
## accuracy_optimism_corrected      0.7098720
## accuracy_optimism_corrected_CIL  0.6110248
## accuracy_optimism_corrected_CIU  0.8055195
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
## Removing 41 ASV(s)
## Removing 3 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               14.0000000
## auc                              0.8415033
## auc_optimism_corrected           0.6934488
## auc_optimism_corrected_CIL       0.5589627
## auc_optimism_corrected_CIU       0.8054960
## accuracy                         0.8174603
## accuracy_optimism_corrected      0.7932043
## accuracy_optimism_corrected_CIL  0.7117243
## accuracy_optimism_corrected_CIU  0.8937027
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
## Removing 16 ASV(s)
## Removing 2 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               19.0000000
## auc                              0.9670111
## auc_optimism_corrected           0.8924815
## auc_optimism_corrected_CIL       0.8406156
## auc_optimism_corrected_CIU       0.9616798
## accuracy                         0.8848921
## accuracy_optimism_corrected      0.8002149
## accuracy_optimism_corrected_CIL  0.7323864
## accuracy_optimism_corrected_CIU  0.8894930
roc_c

Random Forest
model="rf"
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")
## Removing 1478 ASV(s)
## Removing 118 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "13"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.8287759"
## auc_optimism_corrected_CIL      "0.5771336"
## auc_optimism_corrected_CIU      "0.9839286"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.7672228"
## accuracy_optimism_corrected_CIL "0.6215217"
## accuracy_optimism_corrected_CIU "0.8724432"
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")
## Removing 555 ASV(s)
## Removing 36 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "43"       
## splitrule                       "gini"     
## min.node.size                   "5"        
## auc                             "1"        
## auc_optimism_corrected          "0.6561724"
## auc_optimism_corrected_CIL      "0.5778259"
## auc_optimism_corrected_CIU      "0.7348714"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.7743705"
## accuracy_optimism_corrected_CIL "0.696875" 
## accuracy_optimism_corrected_CIU "0.8384067"
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")
## Removing 260 ASV(s)
## Removing 48 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "149"      
## splitrule                       "gini"     
## min.node.size                   "5"        
## auc                             "1"        
## auc_optimism_corrected          "0.8650417"
## auc_optimism_corrected_CIL      "0.7467677"
## auc_optimism_corrected_CIU      "0.948142" 
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.846914" 
## accuracy_optimism_corrected_CIL "0.8"      
## accuracy_optimism_corrected_CIU "0.916964"
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
## Removing 93 ASV(s)
## Removing 8 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "121"      
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.8817009"
## auc_optimism_corrected_CIL      "0.7688763"
## auc_optimism_corrected_CIU      "0.9631799"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.760551" 
## accuracy_optimism_corrected_CIL "0.6504891"
## accuracy_optimism_corrected_CIU "0.8621753"
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
## Removing 41 ASV(s)
## Removing 3 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "7"        
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.7082342"
## auc_optimism_corrected_CIL      "0.5029177"
## auc_optimism_corrected_CIU      "0.8629418"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.7716928"
## accuracy_optimism_corrected_CIL "0.7083333"
## accuracy_optimism_corrected_CIU "0.8392045"
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
## Removing 16 ASV(s)
## Removing 2 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "33"       
## splitrule                       "gini"     
## min.node.size                   "5"        
## auc                             "1"        
## auc_optimism_corrected          "0.8529435"
## auc_optimism_corrected_CIL      "0.7198997"
## auc_optimism_corrected_CIU      "0.9273955"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8159999"
## accuracy_optimism_corrected_CIL "0.7118762"
## accuracy_optimism_corrected_CIU "0.9344975"
roc_c

Gradient boosting
model="gb"
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")
## Removing 1478 ASV(s)
## Removing 118 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "33"       
## splitrule                       "gini"     
## min.node.size                   "5"        
## auc                             "1"        
## auc_optimism_corrected          "0.8529435"
## auc_optimism_corrected_CIL      "0.7198997"
## auc_optimism_corrected_CIU      "0.9273955"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8159999"
## accuracy_optimism_corrected_CIL "0.7118762"
## accuracy_optimism_corrected_CIU "0.9344975"
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")
## Removing 555 ASV(s)
## Removing 36 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs


# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "33"       
## splitrule                       "gini"     
## min.node.size                   "5"        
## auc                             "1"        
## auc_optimism_corrected          "0.8529435"
## auc_optimism_corrected_CIL      "0.7198997"
## auc_optimism_corrected_CIU      "0.9273955"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8159999"
## accuracy_optimism_corrected_CIL "0.7118762"
## accuracy_optimism_corrected_CIU "0.9344975"
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")
## Removing 260 ASV(s)
## Removing 48 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs


# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "33"       
## splitrule                       "gini"     
## min.node.size                   "5"        
## auc                             "1"        
## auc_optimism_corrected          "0.8529435"
## auc_optimism_corrected_CIL      "0.7198997"
## auc_optimism_corrected_CIU      "0.9273955"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8159999"
## accuracy_optimism_corrected_CIL "0.7118762"
## accuracy_optimism_corrected_CIU "0.9344975"
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
## Removing 93 ASV(s)
## Removing 8 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "33"       
## splitrule                       "gini"     
## min.node.size                   "5"        
## auc                             "1"        
## auc_optimism_corrected          "0.8529435"
## auc_optimism_corrected_CIL      "0.7198997"
## auc_optimism_corrected_CIU      "0.9273955"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8159999"
## accuracy_optimism_corrected_CIL "0.7118762"
## accuracy_optimism_corrected_CIU "0.9344975"
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
## Removing 41 ASV(s)
## Removing 3 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "33"       
## splitrule                       "gini"     
## min.node.size                   "5"        
## auc                             "1"        
## auc_optimism_corrected          "0.8529435"
## auc_optimism_corrected_CIL      "0.7198997"
## auc_optimism_corrected_CIU      "0.9273955"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8159999"
## accuracy_optimism_corrected_CIL "0.7118762"
## accuracy_optimism_corrected_CIU "0.9344975"
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
## Removing 16 ASV(s)
## Removing 2 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "33"       
## splitrule                       "gini"     
## min.node.size                   "5"        
## auc                             "1"        
## auc_optimism_corrected          "0.8529435"
## auc_optimism_corrected_CIL      "0.7198997"
## auc_optimism_corrected_CIU      "0.9273955"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8159999"
## accuracy_optimism_corrected_CIL "0.7118762"
## accuracy_optimism_corrected_CIU "0.9344975"
roc_c

Relative abundances

Elastic net
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
## Removing 1478 ASV(s)
## Removing 118 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
##                                      [,1]
## alpha                           0.2000000
## lambda                          0.3778764
## auc                             1.0000000
## auc_czech                       1.0000000
## auc_no                                NaN
## auc_optimism_corrected          0.8454954
## auc_optimism_corrected_CIL      0.7061699
## auc_optimism_corrected_CIU      0.9437710
## accuracy                        1.0000000
## accuracy_czech                        NaN
## accuracy_no                           NaN
## accuracy_optimism_corrected     0.7949445
## accuracy_optimism_corrected_CIL 0.6998447
## accuracy_optimism_corrected_CIU 0.8709821
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
## Removing 555 ASV(s)
## Removing 36 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
##                                        [,1]
## alpha                             0.0000000
## lambda                          102.5526002
## auc                               0.9971405
## auc_czech                         0.9971405
## auc_no                                  NaN
## auc_optimism_corrected            0.5652716
## auc_optimism_corrected_CIL        0.3719551
## auc_optimism_corrected_CIU        0.8215233
## accuracy                          0.8095238
## accuracy_czech                          NaN
## accuracy_no                             NaN
## accuracy_optimism_corrected       0.7872688
## accuracy_optimism_corrected_CIL   0.7117021
## accuracy_optimism_corrected_CIU   0.8568182
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)


# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
## Removing 260 ASV(s)
## Removing 48 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
##                                      [,1]
## alpha                           0.0000000
## lambda                          3.5362260
## auc                             1.0000000
## auc_czech                       1.0000000
## auc_no                                NaN
## auc_optimism_corrected          0.7951580
## auc_optimism_corrected_CIL      0.7080357
## auc_optimism_corrected_CIU      0.8688125
## accuracy                        0.8776978
## accuracy_czech                        NaN
## accuracy_no                           NaN
## accuracy_optimism_corrected     0.7518252
## accuracy_optimism_corrected_CIL 0.6646915
## accuracy_optimism_corrected_CIU 0.8251270
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
## Removing 93 ASV(s)
## Removing 8 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
##                                      [,1]
## alpha                           0.0000000
## lambda                          8.2397352
## auc                             1.0000000
## auc_czech                       1.0000000
## auc_no                                NaN
## auc_optimism_corrected          0.8509168
## auc_optimism_corrected_CIL      0.7440741
## auc_optimism_corrected_CIU      0.9829545
## accuracy                        0.8032787
## accuracy_czech                        NaN
## accuracy_no                           NaN
## accuracy_optimism_corrected     0.7805345
## accuracy_optimism_corrected_CIL 0.6731884
## accuracy_optimism_corrected_CIU 0.9339583
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)


# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
## Removing 41 ASV(s)
## Removing 3 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
##                                        [,1]
## alpha                             0.0000000
## lambda                          130.6178958
## auc                               0.9791667
## auc_czech                         0.9791667
## auc_no                                  NaN
## auc_optimism_corrected            0.6084429
## auc_optimism_corrected_CIL        0.4915927
## auc_optimism_corrected_CIU        0.7402786
## accuracy                          0.8095238
## accuracy_czech                          NaN
## accuracy_no                             NaN
## accuracy_optimism_corrected       0.7930146
## accuracy_optimism_corrected_CIL   0.7350962
## accuracy_optimism_corrected_CIU   0.8823266
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
## Removing 16 ASV(s)
## Removing 2 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
##                                      [,1]
## alpha                           0.8000000
## lambda                          0.1166804
## auc                             0.9059353
## auc_czech                       0.9059353
## auc_no                                NaN
## auc_optimism_corrected          0.7806661
## auc_optimism_corrected_CIL      0.5468750
## auc_optimism_corrected_CIU      0.8714540
## accuracy                        0.7841727
## accuracy_czech                        NaN
## accuracy_no                           NaN
## accuracy_optimism_corrected     0.7021714
## accuracy_optimism_corrected_CIL 0.3905918
## accuracy_optimism_corrected_CIU 0.8397496
roc_c

kNN
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
## Removing 1478 ASV(s)
## Removing 118 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               11.0000000
## auc                              0.9110360
## auc_optimism_corrected           0.6990118
## auc_optimism_corrected_CIL       0.5201451
## auc_optimism_corrected_CIU       0.8744488
## accuracy                         0.6229508
## accuracy_optimism_corrected      0.5953868
## accuracy_optimism_corrected_CIL  0.4885093
## accuracy_optimism_corrected_CIU  0.7069805
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
## Removing 555 ASV(s)
## Removing 36 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               10.0000000
## auc                              0.8588644
## auc_optimism_corrected           0.6665429
## auc_optimism_corrected_CIL       0.5741466
## auc_optimism_corrected_CIU       0.8298764
## accuracy                         0.8095238
## accuracy_optimism_corrected      0.7956747
## accuracy_optimism_corrected_CIL  0.7117021
## accuracy_optimism_corrected_CIU  0.8823266
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
## Removing 260 ASV(s)
## Removing 48 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               15.0000000
## auc                              0.8180975
## auc_optimism_corrected           0.7040794
## auc_optimism_corrected_CIL       0.6010464
## auc_optimism_corrected_CIU       0.7911414
## accuracy                         0.8129496
## accuracy_optimism_corrected      0.7108628
## accuracy_optimism_corrected_CIL  0.6023514
## accuracy_optimism_corrected_CIU  0.7983019
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
## Removing 93 ASV(s)
## Removing 8 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               10.0000000
## auc                              0.8023649
## auc_optimism_corrected           0.6724603
## auc_optimism_corrected_CIL       0.4564286
## auc_optimism_corrected_CIU       0.9615532
## accuracy                         0.6721311
## accuracy_optimism_corrected      0.6505327
## accuracy_optimism_corrected_CIL  0.5652174
## accuracy_optimism_corrected_CIU  0.7910173
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
## Removing 41 ASV(s)
## Removing 3 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               30.0000000
## auc                              0.7640931
## auc_optimism_corrected           0.5967148
## auc_optimism_corrected_CIL       0.4683710
## auc_optimism_corrected_CIU       0.6899451
## accuracy                         0.8095238
## accuracy_optimism_corrected      0.7987181
## accuracy_optimism_corrected_CIL  0.7281915
## accuracy_optimism_corrected_CIU  0.8823266
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
## Removing 16 ASV(s)
## Removing 2 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               16.0000000
## auc                              0.8449921
## auc_optimism_corrected           0.7102435
## auc_optimism_corrected_CIL       0.5709124
## auc_optimism_corrected_CIU       0.7964627
## accuracy                         0.7985612
## accuracy_optimism_corrected      0.7138748
## accuracy_optimism_corrected_CIL  0.6218794
## accuracy_optimism_corrected_CIU  0.8138019
roc_c

Random Forest
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
## Removing 1478 ASV(s)
## Removing 118 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "105"      
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.9169639"
## auc_optimism_corrected_CIL      "0.7780303"
## auc_optimism_corrected_CIU      "0.9842147"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8188161"
## accuracy_optimism_corrected_CIL "0.6602717"
## accuracy_optimism_corrected_CIU "0.9047619"
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
## Removing 555 ASV(s)
## Removing 36 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "53"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.7942196"
## auc_optimism_corrected_CIL      "0.6562306"
## auc_optimism_corrected_CIU      "0.8874175"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.7986889"
## accuracy_optimism_corrected_CIL "0.7250614"
## accuracy_optimism_corrected_CIU "0.8673445"
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
## Removing 260 ASV(s)
## Removing 48 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "71"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.9228307"
## auc_optimism_corrected_CIL      "0.8516193"
## auc_optimism_corrected_CIU      "0.9663803"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8354697"
## accuracy_optimism_corrected_CIL "0.8078764"
## accuracy_optimism_corrected_CIU "0.8776488"
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
## Removing 93 ASV(s)
## Removing 8 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "93"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.9215987"
## auc_optimism_corrected_CIL      "0.8507365"
## auc_optimism_corrected_CIU      "0.9755298"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8093709"
## accuracy_optimism_corrected_CIL "0.7078804"
## accuracy_optimism_corrected_CIU "0.8724432"
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
## Removing 41 ASV(s)
## Removing 3 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "29"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.7494602"
## auc_optimism_corrected_CIL      "0.6336292"
## auc_optimism_corrected_CIU      "0.8452175"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.7778113"
## accuracy_optimism_corrected_CIL "0.7247008"
## accuracy_optimism_corrected_CIU "0.8673445"
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
## Removing 16 ASV(s)
## Removing 2 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "109"      
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.8691973"
## auc_optimism_corrected_CIL      "0.7834489"
## auc_optimism_corrected_CIU      "0.9416474"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8255819"
## accuracy_optimism_corrected_CIL "0.7641586"
## accuracy_optimism_corrected_CIU "0.9079108"
roc_c

Gradient boosting
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
## Removing 1478 ASV(s)
## Removing 118 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "109"      
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.8691973"
## auc_optimism_corrected_CIL      "0.7834489"
## auc_optimism_corrected_CIU      "0.9416474"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8255819"
## accuracy_optimism_corrected_CIL "0.7641586"
## accuracy_optimism_corrected_CIU "0.9079108"
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
## Removing 555 ASV(s)
## Removing 36 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "109"      
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.8691973"
## auc_optimism_corrected_CIL      "0.7834489"
## auc_optimism_corrected_CIU      "0.9416474"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8255819"
## accuracy_optimism_corrected_CIL "0.7641586"
## accuracy_optimism_corrected_CIU "0.9079108"
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_ra")
## Removing 260 ASV(s)
## Removing 48 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "109"      
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.8691973"
## auc_optimism_corrected_CIL      "0.7834489"
## auc_optimism_corrected_CIU      "0.9416474"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8255819"
## accuracy_optimism_corrected_CIL "0.7641586"
## accuracy_optimism_corrected_CIU "0.9079108"
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
## Removing 93 ASV(s)
## Removing 8 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "109"      
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.8691973"
## auc_optimism_corrected_CIL      "0.7834489"
## auc_optimism_corrected_CIU      "0.9416474"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8255819"
## accuracy_optimism_corrected_CIL "0.7641586"
## accuracy_optimism_corrected_CIU "0.9079108"
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
## Removing 41 ASV(s)
## Removing 3 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "109"      
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.8691973"
## auc_optimism_corrected_CIL      "0.7834489"
## auc_optimism_corrected_CIU      "0.9416474"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8255819"
## accuracy_optimism_corrected_CIL "0.7641586"
## accuracy_optimism_corrected_CIU "0.9079108"
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_ra")
## Removing 16 ASV(s)
## Removing 2 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "109"      
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.8691973"
## auc_optimism_corrected_CIL      "0.7834489"
## auc_optimism_corrected_CIU      "0.9416474"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8255819"
## accuracy_optimism_corrected_CIL "0.7641586"
## accuracy_optimism_corrected_CIU "0.9079108"
roc_c

Saving results

models_list <- list()

for (model_name in names(supplements_models$models_summ)){
  df <- do.call(rbind, supplements_models$models_summ[[model_name]])
  models_list[[model_name]] <- df
}

write.xlsx(models_list,
           file=file.path(path,paste0("supplements_models_",segment,".xlsx")),
           rowNames=TRUE)

Results overview

Alpha diversity

pc_observed[[segment]]
pc_shannon[[segment]]
pc_simpson[[segment]]
pc_pielou[[segment]]
alpha_div_plots[[paste(segment,"Custom")]]

Beta diversity

Main results

pairwise_aitchison_raw[[paste("genus", segment)]]

PCA

pca_plots_list[[paste(segment,"genus custom")]]

Supplements

knitr::kable(supplements_beta[!grepl("PCoA",names(supplements_beta))],
             digits = 3,
             caption = "Supplementary PERMANOVA results")
Supplementary PERMANOVA results
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 0.412 2.198 0.036 0.008 0.012 *
pre_ltx vs post_ltx 1 0.230 1.052 0.008 0.350 0.350
post_ltx vs healthy 1 0.771 3.752 0.027 0.001 0.003 **
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 0.499 1.835 0.030 0.005 0.007 **
pre_ltx vs post_ltx 1 0.333 1.101 0.009 0.262 0.262
post_ltx vs healthy 1 0.865 2.980 0.021 0.001 0.003 **
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 484.192 1.552 0.026 0.002 0.003 **
pre_ltx vs post_ltx 1 320.395 1.130 0.009 0.121 0.121
post_ltx vs healthy 1 877.994 2.987 0.021 0.001 0.003 **
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 0.566 1.748 0.029 0.007 0.011 *
pre_ltx vs post_ltx 1 0.437 1.284 0.010 0.113 0.113
post_ltx vs healthy 1 1.212 3.689 0.026 0.001 0.003 **
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 0.558 1.423 0.024 0.008 0.012 *
pre_ltx vs post_ltx 1 0.474 1.174 0.009 0.102 0.102
post_ltx vs healthy 1 1.027 2.597 0.019 0.001 0.003 **

PCA

ggarrange(plotlist = supplements_beta[grepl("PCoA",names(supplements_beta))],
          labels=names(supplements_beta[grepl("PCoA",names(supplements_beta))]),
          font.label = list(size=5,face="plain"),
          ncol=2,nrow=3)

Univariate analysis

Number of significant taxa

knitr::kable(cbind(as.data.frame(lapply(list_intersections,nrow)),
      as.data.frame(lapply(psc_effect,nrow))) %>% t() %>% 
  `colnames<-`("Count") %>% 
  `rownames<-`(c(names(list_intersections),"PSC effect ASV","PSC effect Genus","PSC effect Phylum")),caption="Number of significant taxa")
Number of significant taxa
Count
terminal_ileum genus healthy vs pre_ltx 23
terminal_ileum genus pre_ltx vs post_ltx 6
terminal_ileum genus healthy vs post_ltx 31
terminal_ileum ASV healthy vs pre_ltx 27
terminal_ileum ASV pre_ltx vs post_ltx 3
terminal_ileum ASV healthy vs post_ltx 76
terminal_ileum phylum healthy vs pre_ltx 1
terminal_ileum phylum pre_ltx vs post_ltx 0
terminal_ileum phylum healthy vs post_ltx 2
PSC effect ASV 12
PSC effect Genus 18
PSC effect Phylum 0

Machine learning

Main models

Summary

knitr::kable(models_summ_df_ileum %>% dplyr::select(
"alpha","lambda",
"auc_optimism_corrected",
"auc_optimism_corrected_CIL",
"auc_optimism_corrected_CIU"),
             digits=2,caption="Elastic net results")
Elastic net results
alpha lambda auc_optimism_corrected auc_optimism_corrected_CIL auc_optimism_corrected_CIU
pre_ltx vs healthy ASV terminal_ileum 0.0 4.43 0.91 0.75 0.97
pre_ltx vs post_ltx ASV terminal_ileum 0.0 23.62 0.69 0.58 0.81
post_ltx vs healthy ASV terminal_ileum 0.4 0.04 0.94 0.90 0.99
pre_ltx vs healthy genus terminal_ileum 0.2 0.16 0.91 0.85 1.00
pre_ltx vs post_ltx genus terminal_ileum 0.0 4.87 0.71 0.63 0.83
post_ltx vs healthy genus terminal_ileum 0.4 0.03 0.95 0.92 0.99

ROC - ASV level

roc_curve_all_custom(roc_cs[c(1:3)], 
                     Q="Q1_czech",
                     model_name="enet_model")
## [1] "pre_ltx vs healthy ASV terminal_ileum"  "pre_ltx vs post_ltx ASV terminal_ileum"
## [3] "post_ltx vs healthy ASV terminal_ileum"
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values

ROC - Genus level

roc_curve_all_custom(roc_cs[c(4:6)],Q="Q1_czech",
                     model_name="enet_model")
## [1] "pre_ltx vs healthy genus terminal_ileum"  "pre_ltx vs post_ltx genus terminal_ileum"
## [3] "post_ltx vs healthy genus terminal_ileum"
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values

Supplementary models

Summary

# Build final dataframe
models_list[["enet_model"]] <- models_summ_df_ileum
final_df <- tibble(row_names = rownames(models_list[[1]]))

# Loop through models and extract required values
for (model_name in names(models_list)) {
  model_df <- models_list[[model_name]]
  
  # Combine AUC_optimism_corrected with its CI values
  final_df[[model_name]] <- paste0(
    round(model_df$auc_optimism_corrected, 3), 
    " (", round(model_df$auc_optimism_corrected_CIL, 3), "; ", 
    round(model_df$auc_optimism_corrected_CIU, 3), ")"
  )
}

knitr::kable(final_df, caption="All models")
All models
row_names knn_model rf_model gbm_model enet_model_ra knn_model_ra rf_model_ra gbm_model_ra enet_model
pre_ltx vs healthy ASV terminal_ileum 0.911 (0.846; 0.967) 0.829 (0.577; 0.984) 0.853 (0.72; 0.927) 0.845 (0.706; 0.944) 0.699 (0.52; 0.874) 0.917 (0.778; 0.984) 0.869 (0.783; 0.942) 0.913 (0.746; 0.972)
pre_ltx vs post_ltx ASV terminal_ileum 0.677 (0.585; 0.787) 0.656 (0.578; 0.735) 0.853 (0.72; 0.927) 0.565 (0.372; 0.822) 0.667 (0.574; 0.83) 0.794 (0.656; 0.887) 0.869 (0.783; 0.942) 0.695 (0.582; 0.811)
post_ltx vs healthy ASV terminal_ileum 0.818 (0.705; 0.952) 0.865 (0.747; 0.948) 0.853 (0.72; 0.927) 0.795 (0.708; 0.869) 0.704 (0.601; 0.791) 0.923 (0.852; 0.966) 0.869 (0.783; 0.942) 0.942 (0.901; 0.986)
pre_ltx vs healthy genus terminal_ileum 0.858 (0.791; 0.971) 0.882 (0.769; 0.963) 0.853 (0.72; 0.927) 0.851 (0.744; 0.983) 0.672 (0.456; 0.962) 0.922 (0.851; 0.976) 0.869 (0.783; 0.942) 0.913 (0.849; 0.996)
pre_ltx vs post_ltx genus terminal_ileum 0.693 (0.559; 0.805) 0.708 (0.503; 0.863) 0.853 (0.72; 0.927) 0.608 (0.492; 0.74) 0.597 (0.468; 0.69) 0.749 (0.634; 0.845) 0.869 (0.783; 0.942) 0.709 (0.627; 0.834)
post_ltx vs healthy genus terminal_ileum 0.892 (0.841; 0.962) 0.853 (0.72; 0.927) 0.853 (0.72; 0.927) 0.781 (0.547; 0.871) 0.71 (0.571; 0.796) 0.869 (0.783; 0.942) 0.869 (0.783; 0.942) 0.955 (0.924; 0.993)

ROC - ASV

rocs_list <- supplements_models$roc_cs
rocs_list[["enet_model"]] <- roc_cs

plot_list <- list()

for (model_name in names(rocs_list)) {
  plot_list[[model_name]] <- roc_curve_all_custom(rocs_list[[model_name]][c(1:3)],
                       Q="Q1_czech",
                       model_name=model_name)
}

ggarrange(plotlist = plot_list,labels = names(rocs_list),font.label = list(face="plain",size=7))

ROC - genus

plot_list <- list()

for (model_name in names(rocs_list)) {
  plot_list[[model_name]] <- roc_curve_all_custom(rocs_list[[model_name]][c(4:6)],
                       Q="Q1_czech",
                       model_name=model_name)
}

ggarrange(plotlist = plot_list,labels = names(rocs_list),font.label = list(face="plain",size=7))

Analysis - Colon

segment="colon"

Filtering

Rules: - prevalence > 5% (per group) - nearZeroVar with default settings - sequencing depth > 5000 - taxonomic assignment at least order

Library size

read_counts(colon_asv_tab, line = c(5000,10000))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

Sequencing depth

data_filt <- seq_depth_filtering(colon_asv_tab,
                                 colon_taxa_tab,
                                 colon_metadata,
                                 seq_depth_threshold = 10000)
## Removing 12 ASV(s)
filt_colon_asv_tab <- data_filt[[1]]; alpha_colon_asv_tab <- filt_colon_asv_tab
filt_colon_taxa_tab <- data_filt[[2]]; alpha_colon_taxa_tab <- filt_colon_taxa_tab
filt_colon_metadata <- data_filt[[3]]; alpha_colon_metadata <- filt_colon_metadata

seq_step <- dim(filt_colon_asv_tab)[1]

Library size

read_counts(filt_colon_asv_tab,line = c(10000))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

NearZeroVar

data_filt <- nearzerovar_filtering(filt_colon_asv_tab,
                                   filt_colon_taxa_tab,
                                   filt_colon_metadata)

filt_colon_asv_tab <- data_filt[[1]]
filt_colon_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_colon_asv_tab)[1]

Library size

read_counts(filt_colon_asv_tab,line = c(5000,10000))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

Check zero depth

data_filt <- check_zero_depth(filt_colon_asv_tab, 
                              filt_colon_taxa_tab, 
                              filt_colon_metadata)

filt_colon_asv_tab <- data_filt[[1]]; 
filt_colon_taxa_tab <- data_filt[[2]]; 
filt_colon_metadata <- data_filt[[3]]; 

Library size

read_counts(filt_colon_asv_tab,line = c(5000,10000))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

Final Counts

final_counts_filtering(colon_asv_tab,
                       filt_colon_asv_tab,
                       filt_colon_metadata,
                       seq_step, 0, nearzero_step)

Alpha diversity

path = "../results/Q1_czech/alpha_diversity"

Calculation

# Construct MPSE object
alpha_colon_metadata$Sample <- alpha_colon_metadata$SampleID
colon_mpse <- as.MPSE(construct_phyloseq(alpha_colon_asv_tab,
                                         alpha_colon_taxa_tab,
                                         alpha_colon_metadata))

colon_mpse %<>% mp_rrarefy(raresize = 10000,seed = 123)

# Calculate alpha diversity - rarefied counts
colon_mpse %<>% mp_cal_alpha(.abundance=RareAbundance, force=TRUE)
alpha_data <- data.frame(SampleID=colon_mpse$Sample.x,
                         Observe=colon_mpse$Observe,
                         Shannon=colon_mpse$Shannon,
                         Simpson=colon_mpse$Simpson,
                         Pielou=colon_mpse$Pielou,
                         Group=colon_mpse$Group,
                         Country=colon_mpse$Country,
                         Patient=colon_mpse$Patient)

write.csv(alpha_data,file.path(path,paste0("alpha_indices_",segment,".csv")),
          row.names = FALSE)

Plots

Custom plot

alpha_data <- alpha_data %>% 
  dplyr::select(-c("Simpson","Pielou")) %>%
  mutate(Richness=Observe)

p_B <- alpha_diversity_custom_2(alpha_data,
                                size = 1.5,
                                width = 0.3)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
# save the results
alpha_div_plots[[paste(segment,"Custom")]] <- p_B

p_B

Linear Model

path = "../results/Q1_czech/alpha_diversity"
alpha_data <- read.csv(file.path(path,paste0("alpha_indices_",segment,".csv")))

Richness

results_model <- pairwise.lmer(
  formula = "Observe ~ Group + (1|Patient)",
  factors=alpha_data$Group,
  data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_observe <- results_model[[1]]
  results_model_observe_detailed <- results_model[[2]]
} else {
  results_model_observe <- results_model
  results_model_observe_detailed <- NA
}

# save the results
pc_observed[[segment]] <- results_model_observe
# see the results
knitr::kable(results_model_observe,digits = 3,
caption = "Raw results of linear model of richness estimation.")
Raw results of linear model of richness estimation.
Estimate Std..Error df t.value Pr…t.. p.adj sig
healthy vs Grouppre_ltx -56.074 12.083 77.355 -4.641 0.000 0.000 ***
pre_ltx vs Grouppost_ltx 28.874 12.150 133.073 2.377 0.019 0.019 *
healthy vs Grouppost_ltx -27.195 8.555 161.359 -3.179 0.002 0.003 **
# see the results
knitr::kable(results_model[[3]],digits = 3,
caption = "Group means")
Group means
Estimate Std. Error df t value Pr(>|t|)
healthy 171.741 6.999 162.573 24.537 0
pre_ltx 115.661 10.965 133.827 10.549 0
healthy 171.795 6.790 77.014 25.302 0

Shannon

results_model <- pairwise.lmer(
  formula = "Shannon ~ Group + (1|Patient)",
  factors=alpha_data$Group,
  data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_shannon <- results_model[[1]]
  results_model_shannon_detailed <- results_model[[2]]
} else {
  results_model_shannon <- results_model
  results_model_shannon_detailed <- NA
}

# save the results
pc_shannon[[segment]] <- as.data.frame(results_model_shannon)
# see the results
knitr::kable(results_model_shannon,digits = 3,
caption = "Raw results of linear model of Shannon estimation.")
Raw results of linear model of Shannon estimation.
Estimate Std..Error df t.value Pr…t.. p.adj sig
healthy vs Grouppre_ltx -0.585 0.134 77.248 -4.359 0.000 0.00 ***
pre_ltx vs Grouppost_ltx 0.365 0.160 130.742 2.281 0.024 0.03 *
healthy vs Grouppost_ltx -0.220 0.101 158.220 -2.185 0.030 0.03 *
# see the results
knitr::kable(results_model[[3]],digits = 3,
caption = "Group means")
Group means
Estimate Std. Error df t value Pr(>|t|)
healthy 3.686 0.082 159.083 44.800 0
pre_ltx 3.101 0.144 131.102 21.518 0
healthy 3.686 0.075 76.839 48.853 0

Simpson

results_model <- pairwise.lmer(
  formula = "Simpson ~ Group + (1|Patient)",
  factors=alpha_data$Group,
  data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_simpson <- results_model[[1]]
  results_model_simpson_detailed <- results_model[[2]]
} else {
  results_model_simpson <- results_model
  results_model_simpson_detailed <- NA
}

# save the results
pc_simpson[[segment]] <- as.data.frame(results_model_simpson)
# see the results
knitr::kable(results_model_simpson,digits = 3,
caption = "Raw results of linear model of Simpson estimation.")
Raw results of linear model of Simpson estimation.
Estimate Std..Error df t.value Pr…t.. p.adj sig
healthy vs Grouppre_ltx -0.059 0.016 78.192 -3.658 0.000 0.001 **
pre_ltx vs Grouppost_ltx 0.031 0.027 130.285 1.174 0.242 0.242
healthy vs Grouppost_ltx -0.027 0.017 157.776 -1.613 0.109 0.163
# see the results
knitr::kable(results_model[[3]],digits = 3,
caption = "Group means")
Group means
Estimate Std. Error df t value Pr(>|t|)
healthy 0.944 0.014 158.265 68.189 0
pre_ltx 0.885 0.024 130.534 36.671 0
healthy 0.944 0.009 77.739 104.354 0

Pielou

results_model <- pairwise.lmer(
  formula = "Pielou ~ Group + (1|Patient)",
  factors=alpha_data$Group,
  data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_pielou <- results_model[[1]]
  results_model_pielou_detailed <- results_model[[2]]
} else {
  results_model_pielou <- results_model
  results_model_pielou_detailed <- NA
}

# save the results
pc_pielou[[segment]] <- as.data.frame(results_model_pielou)
# see the results
knitr::kable(results_model_pielou,digits = 3,
caption = "Raw results of linear model of Pielou estimation.")
Raw results of linear model of Pielou estimation.
Estimate Std..Error df t.value Pr…t.. p.adj sig
healthy vs Grouppre_ltx -0.057 0.018 77.904 -3.176 0.002 0.006 **
pre_ltx vs Grouppost_ltx 0.039 0.025 130.452 1.603 0.111 0.167
healthy vs Grouppost_ltx -0.018 0.016 157.415 -1.139 0.257 0.257
# see the results
knitr::kable(results_model[[3]],digits = 3,
caption = "Group means")
Group means
Estimate Std. Error df t value Pr(>|t|)
healthy 0.721 0.013 158.282 55.948 0
pre_ltx 0.663 0.022 130.843 29.919 0
healthy 0.721 0.010 77.363 71.018 0

Saving results

alpha_list <- list(
  Richness=pc_observed[[segment]] %>% rownames_to_column("Comparison"),
  Shannon=pc_shannon[[segment]] %>% rownames_to_column("Comparison"),
  Simpson=pc_simpson[[segment]] %>% rownames_to_column("Comparison"),
  Pielou=pc_pielou[[segment]] %>% rownames_to_column("Comparison"))
                   
write.xlsx(alpha_list, 
           file = file.path(path,paste0("alpha_diversity_results_",segment,".xlsx")))

Beta diversity

Calculating Aitchison distance (euclidean distance on clr-transformed data), both at ASV and genus level.

Main analysis - Genus, Aitchison

Genus level, Aitchison distance

level="genus"
path = "../results/Q1_czech/beta_diversity"

Aggregation, filtering

# Aggregation
genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level=level,
                             names=TRUE)
# Filtration
filt_data <- filtering_steps(genus_data[[1]],
                             genus_data[[2]],
                             colon_metadata,
                             seq_depth_threshold=10000)

filt_colon_genus_tab <- filt_data[[1]]
filt_colon_genus_taxa <- filt_data[[2]]
filt_colon_genus_metadata <- filt_data[[3]]
PERMANOVA
pairwise_df <- filt_colon_genus_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_colon_genus_metadata$Group,
                           covariate = NULL, 
                           patients = filt_colon_genus_metadata$Patient,
                           sim.method = "robust.aitchison", p.adjust.m="BH")


# tidy the results
pp_factor <- pp_main

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols

# save raw results
pairwise_aitchison_raw[[paste(level, segment)]] <- pp_factor
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 370.737 2.136 0.016 0.217 0.217
pre_ltx vs post_ltx 1 394.216 2.228 0.009 0.103 0.154
post_ltx vs healthy 1 904.473 5.126 0.017 0.001 0.003 **
Plots

Custom

p <- pca_plot_custom(filt_colon_genus_tab,
                                 filt_colon_genus_taxa,
                                 filt_colon_genus_metadata,
                                 show_boxplots = TRUE,
                                 variable = "Group", size=3, show_legend=TRUE)

# save the results
pca_plots_list[[paste(segment,level,"custom")]] <- p

# see the results
p

Saving results

write.xlsx(pairwise_aitchison_raw[[paste(level, segment)]], 
           file = file.path(path,
           paste0("beta_diversity_results_", segment,".xlsx")))

Supplementary analysis

Genus level

level="genus"
Bray-Curtis

PERMANOVA

pairwise_df <- filt_colon_genus_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_colon_genus_metadata$Group,
                           covariate = NULL, 
                            patients = filt_colon_genus_metadata$Patient,
                           sim.method = "bray", p.adjust.m="BH")


# tidy the results
pp_factor <- pp_main

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols

# save raw results
supplements_beta[[paste("bray",level,segment)]] <- pp_factor
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 0.668 3.662 0.026 0.053 0.053
pre_ltx vs post_ltx 1 0.700 3.489 0.014 0.014 0.021 *
post_ltx vs healthy 1 1.306 7.046 0.023 0.001 0.003 **

Plots

p <- pca_plot_custom(filt_colon_genus_tab,
                                 filt_colon_genus_taxa,
                                 filt_colon_genus_metadata,
                                 measure = "bray",
                                 show_boxplots = TRUE,
                                 variable = "Group", size=3, show_legend=TRUE)

# save the results
supplements_beta[[paste("PCoA bray",level,segment)]] <- p

# see the results
p

Jaccard

PERMANOVA

pairwise_df <- filt_colon_genus_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_colon_genus_metadata$Group,
                           covariate = NULL, 
                            patients = filt_colon_genus_metadata$Patient,
                           sim.method = "jaccard", p.adjust.m="BH")


# tidy the results
pp_factor <- pp_main

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols

# save raw results
supplements_beta[[paste("jaccard",level,segment)]] <- pp_factor
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 0.752 2.816 0.020 0.067 0.067
pre_ltx vs post_ltx 1 0.830 2.894 0.011 0.022 0.033 *
post_ltx vs healthy 1 1.461 5.331 0.017 0.001 0.003 **

Plots

Custom

p <- pca_plot_custom(filt_colon_genus_tab,
                                 filt_colon_genus_taxa,
                                 filt_colon_genus_metadata,
                                 measure = "jaccard",
                                 show_boxplots = TRUE,
                                 variable = "Group", size=3, show_legend=TRUE)

# save the results
supplements_beta[[paste("PCoA jaccard",level,segment)]] <- p

# see the results
p

ASV level

level="ASV"
Aitchison

PERMANOVA

# preparing data frame
pairwise_df <- filt_colon_asv_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_colon_metadata$Group,
                           covariate = NULL, 
                            patients = filt_colon_metadata$Patient,
                           sim.method = "robust.aitchison", p.adjust.m="BH")


# tidy the results
pp_factor <- pp_main

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols

# save raw results
supplements_beta[[paste("aitchison",level,segment)]] <- pp_factor
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 453.896 1.573 0.012 0.779 0.779
pre_ltx vs post_ltx 1 515.107 1.889 0.008 0.323 0.485
post_ltx vs healthy 1 1332.113 4.762 0.016 0.001 0.003 **

PCoA

p <- pca_plot_custom(filt_colon_asv_tab,
                           filt_colon_taxa_tab,
                           filt_colon_metadata,
                           show_boxplots = TRUE,
                           variable = "Group", size=3, show_legend=TRUE)

# save the results
supplements_beta[[paste("PCoA aitchison",level,segment)]] <- p

# see the results
p

Bray-Curtis

PERMANOVA

# preparing data frame
pairwise_df <- filt_colon_asv_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_colon_metadata$Group,
                           covariate = NULL, 
                           patients = filt_colon_metadata$Patient,
                           sim.method = "bray", p.adjust.m="BH")


# tidy the results
pp_factor <- pp_main

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols

# save raw results
supplements_beta[[paste("bray",level,segment)]] <- pp_factor
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 0.703 2.227 0.016 0.247 0.247
pre_ltx vs post_ltx 1 0.940 2.914 0.012 0.022 0.033 *
post_ltx vs healthy 1 2.080 6.653 0.022 0.001 0.003 **

PCoA

p <- pca_plot_custom(filt_colon_asv_tab,
                     filt_colon_taxa_tab,
                     filt_colon_metadata,
                     measure = "bray",
                     show_boxplots = TRUE,
                     variable = "Group", size=3, show_legend=TRUE)

# save the results
supplements_beta[[paste("PCoA bray",level,segment)]] <- p

# see the results
p

Jaccard

PERMANOVA

# preparing data frame
pairwise_df <- filt_colon_asv_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_colon_metadata$Group,
                           covariate = NULL, 
                           patients = filt_colon_metadata$Patient,
                           sim.method = "jaccard", p.adjust.m="BH")


# tidy the results
pp_factor <- pp_main

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols

# save raw results
supplements_beta[[paste("jaccard",level,segment)]] <- pp_factor
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 0.681 1.767 0.013 0.632 0.632
pre_ltx vs post_ltx 1 0.888 2.272 0.009 0.073 0.109
post_ltx vs healthy 1 1.733 4.508 0.015 0.001 0.003 **

PCoA

p <- pca_plot_custom(filt_colon_asv_tab,
                     filt_colon_taxa_tab,
                     filt_colon_metadata,
                     measure = "jaccard",
                     show_boxplots = TRUE,
                     variable = "Group", size=3, show_legend=TRUE)

# save the results
supplements_beta[[paste("PCoA jaccard",level,segment)]] <- p

# see the results
p

Saving results

write.xlsx(supplements_beta[!grepl("PCoA",names(supplements_beta))],
           file = file.path(path,
           paste0("supplements_beta_diversity_", segment,".xlsx")))

Univariate Analysis

Main - Genus level

level="genus"
# needed paths
path = "../results/Q1_czech/univariate_analysis"
path_maaslin=file.path("../intermediate_files/maaslin/Q1_czech/",level)
# variables
raw_linda_results_genus[[segment]] <- list()
linda_results_genus[[segment]] <- list()

# country and interaction problems
list_country_union <- list()
list_intersections <- list()
list_venns <- list()
uni_statistics <- list()

# workbook for final df
wb <- createWorkbook()

# PSC effect
psc_effect <- list()

Genus level

level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]

colon_genus_asv_taxa_tab <- create_asv_taxa_table(colon_genus_tab,
                                                  colon_genus_taxa_tab)
pre_ltx vs healthy
linDA
group <- c("healthy","pre_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
# prepare the data
linda_data <- binomial_prep(colon_genus_tab,
                            colon_genus_taxa_tab,
                            colon_metadata,
                            group, usage="linDA")
## Removing 148 ASV(s)
filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_colon_uni_data,
                   filt_colon_uni_metadata,
                   formula = '~ Group + (1|Patient)')
## 0  features are filtered!
## The filtered data has  137  samples and  191  features will be tested!
## Imputation approach is used.
## Fit linear mixed effects models ...
## Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])


for (grp in c(group1)){
  raw_linda_results_genus[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_colon_uni_data,
                filt_colon_uni_taxa)
  
  linda_results_genus[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_colon_uni_data,
             filt_colon_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, group1, 
                                taxa_table = filt_colon_uni_taxa) + 
            ggtitle(paste(group,collapse=" vs "))


volcano <- ggarrange(volcano_1)

# see the plot
volcano
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider increasing max.overlaps

MaAsLin2
volcano1 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) + 
            ggtitle(paste(group[1], "vs", group[2]))


volcano <- ggarrange(volcano1)
## Warning: Removed 18 rows containing missing values or values outside the scale range (`geom_text_repel()`).
volcano

Group - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results_genus, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Basic statistics
uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results_genus[[segment]][[group1]],
                 by="SeqID",all=TRUE)
## [1] "healthy"
## [1] "pre_ltx"
## [1] "healthy"
## [1] "pre_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
linDA
# prepare the data
linda_data <- binomial_prep(colon_genus_tab,
                            colon_genus_taxa_tab,
                            colon_metadata,
                            group, usage="linDA")
## Removing 17 ASV(s)
filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_colon_uni_data,
                   filt_colon_uni_metadata,
                   formula = '~ Group + (1|Patient)')
## 0  features are filtered!
## The filtered data has  251  samples and  181  features will be tested!
## Imputation approach is used.
## Fit linear mixed effects models ...
## Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])


for (grp in c(group1)){
  raw_linda_results_genus[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_colon_uni_data,
                filt_colon_uni_taxa)
  
  linda_results_genus[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_colon_uni_data,
             filt_colon_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, group1, 
                                taxa_table = filt_colon_uni_taxa) + 
            ggtitle(paste(group,collapse=" vs "))

volcano <- ggarrange(volcano_1)

# see the plot
volcano

MaAsLin2
volcano1 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) + 
            ggtitle(paste(group[1], "vs", group[2]))


volcano <- ggarrange(volcano1)
## Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text_repel()`).
volcano

Group - Intersection
intersection_results <- group_intersection(group, 
                                           list_intersections, 
                                           list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results_genus, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Basic statistics
uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results_genus[[segment]][[group1]],
                 by="SeqID",all=TRUE)
## [1] "pre_ltx"
## [1] "post_ltx"
## [1] "pre_ltx"
## [1] "post_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)
post_ltx vs healthy
group <- c("healthy","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
linDA
# prepare the data
linda_data <- binomial_prep(colon_genus_tab,
                            colon_genus_taxa_tab,
                            colon_metadata,
                            group, usage="linDA")
## Removing 5 ASV(s)
filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_colon_uni_data,
                   filt_colon_uni_metadata,
                   formula = '~ Group + (1|Patient)')
## 0  features are filtered!
## The filtered data has  304  samples and  179  features will be tested!
## Pseudo-count approach is used.
## Fit linear mixed effects models ...
## Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])


for (grp in c(group1)){
  raw_linda_results_genus[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_colon_uni_data,
                filt_colon_uni_taxa)
  
  linda_results_genus[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_colon_uni_data,
             filt_colon_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, group1, 
                                taxa_table = filt_colon_uni_taxa) + 
            ggtitle(paste(group,collapse=" vs "))

volcano <- ggarrange(volcano_1)

# see the plot
volcano
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider increasing max.overlaps

MaAsLin2
volcano1 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) + 
            ggtitle(paste(group[1], "vs", group[2]))

volcano <- ggarrange(volcano1)
## Warning: Removed 30 rows containing missing values or values outside the scale range (`geom_text_repel()`).
volcano

Group - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results_genus, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Basic statistics
uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results_genus[[segment]][[group1]],
                 by="SeqID",all=TRUE)
## [1] "healthy"
## [1] "post_ltx"
## [1] "healthy"
## [1] "post_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)
Visualization

Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.

list_heatmap <- list_intersections[grep(paste(segment,level),
                                  names(list_intersections),value=TRUE)]

p_heatmap_linda <- heatmap_linda(list_heatmap,colon_taxa_tab)
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. Please do this redirection yourself like
## reshape2::dcast(plot_df_combined). In the next version, this warning will become an error.
p_heatmap_linda

Dot heatmap

dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
                                      uni_df,
                                      colon_taxa_tab)
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## min_clr -1.88582 
## max_clr 7.041304 
## min_log -4.098288 
## max_log 4.831992
dotheatmap_linda

PSC effect

pre_LTx vs Healthy and Post_LTx vs Healthy intersection

imagename
imagename
A <- list_intersections[[paste(segment,level,"healthy vs pre_ltx")]]$SeqID
B <- list_intersections[[paste(segment,level,"healthy vs post_ltx")]]$SeqID
C <- intersect(A,B)

psc_effect[[paste(segment,level)]] <- list_intersections[[paste(segment,level, "healthy vs pre_ltx")]][list_intersections[[paste(segment,level, "healthy vs pre_ltx")]]$SeqID %in% C,]
  
# see the results
psc_effect[[paste(segment,level)]] 

Saving results

# ALL DATA
saveWorkbook(wb,file.path(path,paste0("uni_analysis_wb_",segment,".xlsx")),
             overwrite = TRUE)

# PSC effect
write.xlsx(psc_effect[[paste(segment,level)]],file.path(path,paste0("psc_effect_",segment,".xlsx")))

# SIGNIFICANT taxa

write.xlsx(list_intersections[grepl(segment,names(list_intersections))] %>%
            `names<-`(gsub(segment, "", names(
              list_intersections[grepl(segment,names(list_intersections))]))),
           file.path(path,paste0("significant_taxa_",segment,".xlsx")))

Supplementary Analysis

ASV level

level="ASV"
path_maaslin="../intermediate_files/maaslin/Q1_czech/ASV/"
raw_linda_results[[segment]] <- list()
linda_results[[segment]] <- list()
supplements_wb <- createWorkbook()
pre_ltx vs healthy
group <- c("healthy","pre_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])

linDA

# prepare the data
linda_data <- binomial_prep(colon_asv_tab,
                            colon_taxa_tab,
                            colon_metadata,
                            group, usage="linDA")
## Removing 1808 ASV(s)
## Removing 13 ASV(s)
filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_colon_uni_data, 
                   filt_colon_uni_metadata, 
                   formula = '~ Group + (1|Patient)')
## 0  features are filtered!
## The filtered data has  137  samples and  518  features will be tested!
## Imputation approach is used.
## Fit linear mixed effects models ...
## Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

group1 <- paste0(group[1], " vs ","Group",group[2])

for (grp in c(group1)){
  raw_linda_results[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_colon_uni_data,
                filt_colon_uni_taxa)
  
  linda_results[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_colon_uni_data,
             filt_colon_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, group1,
                                taxa_table = filt_colon_uni_taxa) +
              ggtitle(paste(group,collapse=" vs "))


volcano <- ggarrange(volcano_1)

# see the plot
volcano

MaAsLin2

Volcano plot

volcano1 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) + 
            ggtitle(paste(group[1], "vs", group[2]))

volcano <- ggarrange(volcano1)

# see the results
volcano
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider increasing max.overlaps

Group - Intersection

intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results, 
                                                                                      segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Basic statistics

uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results[[segment]][[group1]],
                 by="SeqID",all=TRUE)
## [1] "healthy"
## [1] "pre_ltx"
## [1] "healthy"
## [1] "pre_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- paste(level,comparison_name)
addWorksheet(supplements_wb, sheetName = new_name)
writeData(supplements_wb, sheet = new_name, uni_df, rowNames=FALSE)
pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])

linDA

# prepare the data
linda_data <- binomial_prep(colon_asv_tab,
                            colon_taxa_tab,
                            colon_metadata,
                            group, usage="linDA")
## Removing 743 ASV(s)
## Removing 13 ASV(s)
filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_colon_uni_data, 
                   filt_colon_uni_metadata, 
                   formula = '~ Group + (1|Patient)')
## 0  features are filtered!
## The filtered data has  250  samples and  419  features will be tested!
## Imputation approach is used.
## Fit linear mixed effects models ...
## Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

group1 <- paste0(group[1], " vs ","Group",group[2])

for (grp in c(group1)){
  raw_linda_results[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_colon_uni_data,
                filt_colon_uni_taxa)
  
  linda_results[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_colon_uni_data,
             filt_colon_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, group1,
                                taxa_table = filt_colon_uni_taxa) +
              ggtitle(paste(group,collapse=" vs "))

volcano <- ggarrange(volcano_1)

# see the plot
volcano

MaAsLin2

volcano1 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) + 
            ggtitle(paste(group[1], "vs", group[2]))

volcano <- ggarrange(volcano1)

# see the results
volcano

Group - Intersection

intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results, 
                                           segment=segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Basic statistics

uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results[[segment]][[group1]],
                 by="SeqID",all=TRUE)
## [1] "pre_ltx"
## [1] "post_ltx"
## [1] "pre_ltx"
## [1] "post_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- paste(level,comparison_name)
addWorksheet(supplements_wb, sheetName = new_name)
writeData(supplements_wb, sheet = new_name, uni_df, rowNames=FALSE)
post_ltx vs healthy
group <- c("healthy","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])

linDA

# prepare the data
linda_data <- binomial_prep(colon_asv_tab,
                            colon_taxa_tab,
                            colon_metadata,
                            group, usage="linDA")
## Removing 232 ASV(s)
## Removing 4 ASV(s)
filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_colon_uni_data, 
                   filt_colon_uni_metadata, 
                   formula = '~ Group + (1|Patient)')
## 0  features are filtered!
## The filtered data has  303  samples and  440  features will be tested!
## Pseudo-count approach is used.
## Fit linear mixed effects models ...
## Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

group1 <- paste0(group[1], " vs ","Group",group[2])

for (grp in c(group1)){
  raw_linda_results[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_colon_uni_data,
                filt_colon_uni_taxa)
  
  linda_results[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_colon_uni_data,
             filt_colon_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, group1,
                                taxa_table = filt_colon_uni_taxa) +
              ggtitle(paste(group,collapse=" vs "))


volcano <- ggarrange(volcano_1)

# see the plot
volcano
## Warning: ggrepel: 22 unlabeled data points (too many overlaps). Consider increasing max.overlaps

MaAsLin2

volcano1 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) + 
            ggtitle(paste(group[1], "vs", group[2]))

volcano <- ggarrange(volcano1)
volcano
## Warning: ggrepel: 94 unlabeled data points (too many overlaps). Consider increasing max.overlaps

Group - Intersection

intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Basic statistics

uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results[[segment]][[group1]],
                 by="SeqID",all=TRUE)
## [1] "healthy"
## [1] "post_ltx"
## [1] "healthy"
## [1] "post_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- paste(level,comparison_name)
addWorksheet(supplements_wb, sheetName = new_name)
writeData(supplements_wb, sheet = new_name, uni_df, rowNames=FALSE)
Visualization

Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.

list_heatmap <- list_intersections[grep(paste(segment,level),
                                  names(list_intersections),value=TRUE)]

p_heatmap_linda <- heatmap_linda(list_heatmap,colon_taxa_tab)
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. Please do this redirection yourself like
## reshape2::dcast(plot_df_combined). In the next version, this warning will become an error.
p_heatmap_linda

Dot heatmap

dotheatmap_linda <- dot_heatmap_linda(list_heatmap,uni_df,colon_taxa_tab)
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## min_clr -1.032666 
## max_clr 6.145786 
## min_log -4.652855 
## max_log 4.192808
dotheatmap_linda

PSC effect

pre_LTx vs Healthy and Post_LTx vs Healthy intersection

imagename
imagename
A <- list_intersections[[paste(segment,level,"healthy vs pre_ltx")]]$SeqID
B <- list_intersections[[paste(segment,level,"healthy vs post_ltx")]]$SeqID
C <- intersect(A,B)

psc_effect[[paste(segment,level)]] <- list_intersections[[paste(segment,level, "healthy vs pre_ltx")]][list_intersections[[paste(segment,level, "healthy vs pre_ltx")]]$SeqID %in% C,]
  
# see the results
psc_effect[[paste(segment,level)]] 

Phylum level

level="phylum"
path_maaslin="../intermediate_files/maaslin/Q1_czech/Phylum/"
raw_linda_results_phylum[[segment]] <- list()
linda_results_phylum[[segment]] <- list()

Aggregate taxa

phylum_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = "Phylum")

colon_phylum_tab <- phylum_data[[1]]
colon_phylum_taxa_tab <- phylum_data[[2]]
pre_ltx vs healthy
group <- c("healthy","pre_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])

linDA

# prepare the data
linda_data <- binomial_prep(colon_phylum_tab,
                            colon_phylum_taxa_tab,
                            colon_metadata,
                            group, usage="linDA")
## Removing 7 ASV(s)
filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_colon_uni_data, 
                   filt_colon_uni_metadata, 
                   formula = '~ Group + (1|Patient)')
## 0  features are filtered!
## The filtered data has  137  samples and  10  features will be tested!
## Imputation approach is used.
## Fit linear mixed effects models ...
## Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

group1 <- paste0(group[1], " vs ","Group",group[2])

for (grp in c(group1)){
  raw_linda_results[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_colon_uni_data,
                filt_colon_uni_taxa)
  
  linda_results[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_colon_uni_data,
             filt_colon_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, group1,
                                taxa_table = filt_colon_uni_taxa) +
              ggtitle(paste(group,collapse=" vs "))
## Using Phylum for naming
volcano <- ggarrange(volcano_1)

# see the plot
volcano

MaAsLin2

Volcano plot

volcano1 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) + 
            ggtitle(paste(group[1], "vs", group[2]))

volcano <- ggarrange(volcano1)

# see the results
volcano

Group - Intersection

intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results, 
                                                                                      segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Basic statistics

uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results[[segment]][[group1]],
                 by="SeqID",all=TRUE)
## [1] "healthy"
## [1] "pre_ltx"
## [1] "healthy"
## [1] "pre_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- paste(level,comparison_name)
addWorksheet(supplements_wb, sheetName = new_name)
writeData(supplements_wb, sheet = new_name, uni_df, rowNames=FALSE)
pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])

linDA

# prepare the data
linda_data <- binomial_prep(colon_phylum_tab,
                            colon_phylum_taxa_tab,
                            colon_metadata,
                            group, usage="linDA")

filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_colon_uni_data, 
                   filt_colon_uni_metadata, 
                   formula = '~ Group + (1|Patient)')
## 0  features are filtered!
## The filtered data has  251  samples and  10  features will be tested!
## Imputation approach is used.
## Fit linear mixed effects models ...
## Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

group1 <- paste0(group[1], " vs ","Group",group[2])

for (grp in c(group1)){
  raw_linda_results[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_colon_uni_data,
                filt_colon_uni_taxa)
  
  linda_results[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_colon_uni_data,
             filt_colon_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, group1,
                                taxa_table = filt_colon_uni_taxa) +
              ggtitle(paste(group,collapse=" vs "))
## Using Phylum for naming
volcano <- ggarrange(volcano_1)

# see the plot
volcano

MaAsLin2

volcano1 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) + 
            ggtitle(paste(group[1], "vs", group[2]))

volcano <- ggarrange(volcano1)

# see the results
volcano

Group - Intersection

intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results, 
                                           segment=segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Basic statistics

uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results[[segment]][[group1]],
                 by="SeqID",all=TRUE)
## [1] "pre_ltx"
## [1] "post_ltx"
## [1] "pre_ltx"
## [1] "post_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- paste(level,comparison_name)
addWorksheet(supplements_wb, sheetName = new_name)
writeData(supplements_wb, sheet = new_name, uni_df, rowNames=FALSE)
post_ltx vs healthy
group <- c("healthy","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])

linDA

# prepare the data
linda_data <- binomial_prep(colon_phylum_tab,
                            colon_phylum_taxa_tab,
                            colon_metadata,
                            group, usage="linDA")

filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_colon_uni_data, 
                   filt_colon_uni_metadata, 
                   formula = '~ Group + (1|Patient)')
## 0  features are filtered!
## The filtered data has  304  samples and  10  features will be tested!
## Imputation approach is used.
## Fit linear mixed effects models ...
## Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

group1 <- paste0(group[1], " vs ","Group",group[2])

for (grp in c(group1)){
  raw_linda_results[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_colon_uni_data,
                filt_colon_uni_taxa)
  
  linda_results[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_colon_uni_data,
             filt_colon_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, group1,
                                taxa_table = filt_colon_uni_taxa) +
              ggtitle(paste(group,collapse=" vs "))
## Using Phylum for naming
volcano <- ggarrange(volcano_1)

# see the plot
volcano

MaAsLin2

volcano1 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) + 
            ggtitle(paste(group[1], "vs", group[2]))


volcano <- ggarrange(volcano1)
volcano

Group - Intersection

intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

Basic statistics

uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results[[segment]][[group1]],
                 by="SeqID",all=TRUE)
## [1] "healthy"
## [1] "post_ltx"
## [1] "healthy"
## [1] "post_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- paste(level,comparison_name)
addWorksheet(supplements_wb, sheetName = new_name)
writeData(supplements_wb, sheet = new_name, uni_df, rowNames=FALSE)
Visualization

Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.

list_heatmap <- list_intersections[grep(paste(segment,level),
                                  names(list_intersections),value=TRUE)]

p_heatmap_linda <- heatmap_linda(list_heatmap,colon_taxa_tab)
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. Please do this redirection yourself like
## reshape2::dcast(plot_df_combined). In the next version, this warning will become an error.
p_heatmap_linda

Dot heatmap

dotheatmap_linda <- dot_heatmap_linda(list_heatmap,uni_df,colon_taxa_tab)
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(plot_df). In the next version,
## this warning will become an error.
## min_clr -3.538652 
## max_clr 4.750646 
## min_log -3.526691 
## max_log 2.048554
dotheatmap_linda

PSC effect

pre_LTx vs Healthy and Post_LTx vs Healthy intersection

imagename
imagename
A <- list_intersections[[paste(segment,level,"healthy vs pre_ltx")]]$SeqID
B <- list_intersections[[paste(segment,level,"healthy vs post_ltx")]]$SeqID
C <- intersect(A,B)

psc_effect[[paste(segment,level)]] <- list_intersections[[paste(segment,level, "healthy vs pre_ltx")]][list_intersections[[paste(segment,level, "healthy vs pre_ltx")]]$SeqID %in% C,]
  
# see the results
psc_effect[[paste(segment,level)]] 

Saving results

# ALL DATA
saveWorkbook(supplements_wb,file.path(path,paste0("supplements_uni_analysis_wb_",segment,".xlsx")),overwrite = TRUE)

# PSC effect
write.xlsx(psc_effect,
          file.path(path,paste0("supplements_psc_effect_",segment,".xlsx")))

# SIGNIFICANT taxa
write.xlsx(list_intersections[grepl(segment,names(list_intersections))] %>%
            `names<-`(gsub(segment, "", names(
              list_intersections[grepl(segment,names(list_intersections))]))),
           file.path(path,paste0("supplements_significant_taxa_",segment,".xlsx")))

Machine learning

path = "../results/Q1_czech/models"

ElasticNet

model="enet"

ASV level

level="ASV"
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)
## Removing 1808 ASV(s)
## Removing 13 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
##                                       [,1]
## alpha                           0.20000000
## lambda                          0.07726996
## auc                             1.00000000
## auc_czech                       1.00000000
## auc_no                                 NaN
## auc_optimism_corrected          0.92487601
## auc_optimism_corrected_CIL      0.83593690
## auc_optimism_corrected_CIU      0.97978125
## accuracy                        1.00000000
## accuracy_czech                         NaN
## accuracy_no                            NaN
## accuracy_optimism_corrected     0.87779018
## accuracy_optimism_corrected_CIL 0.79942979
## accuracy_optimism_corrected_CIU 0.95779630
enet_model$conf_matrices
## $original
##     Predicted
## True  0  1
##    0 95  0
##    1  0 42
## 
## $czech
##     Predicted
## True  0  1
##    0 95  0
##    1  0 42
## 
## $no
## [1] NaN
enet_model$plot
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

roc_c

pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)
## Removing 743 ASV(s)
## Removing 13 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", N=10,
                              clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
##                                        [,1]
## alpha                           1.000000000
## lambda                          0.007559533
## auc                             1.000000000
## auc_czech                       1.000000000
## auc_no                                  NaN
## auc_optimism_corrected          0.845603174
## auc_optimism_corrected_CIL      0.732635525
## auc_optimism_corrected_CIU      0.914518784
## accuracy                        1.000000000
## accuracy_czech                          NaN
## accuracy_no                             NaN
## accuracy_optimism_corrected     0.840254112
## accuracy_optimism_corrected_CIL 0.781520563
## accuracy_optimism_corrected_CIU 0.897722491
enet_model$conf_matrices
## $original
##     Predicted
## True   0   1
##    0 208   0
##    1   0  42
## 
## $czech
##     Predicted
## True   0   1
##    0 208   0
##    1   0  42
## 
## $no
## [1] NaN
enet_model$plot
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

roc_c

post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)
## Removing 232 ASV(s)
## Removing 4 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
##                                        [,1]
## alpha                           0.400000000
## lambda                          0.008015337
## auc                             1.000000000
## auc_czech                       1.000000000
## auc_no                                  NaN
## auc_optimism_corrected          0.976476725
## auc_optimism_corrected_CIL      0.958769096
## auc_optimism_corrected_CIU      0.990150128
## accuracy                        1.000000000
## accuracy_czech                          NaN
## accuracy_no                             NaN
## accuracy_optimism_corrected     0.921683460
## accuracy_optimism_corrected_CIL 0.866156635
## accuracy_optimism_corrected_CIU 0.969726503
enet_model$conf_matrices
## $original
##     Predicted
## True   0   1
##    0  95   0
##    1   0 208
## 
## $czech
##     Predicted
## True   0   1
##    0  95   0
##    1   0 208
## 
## $no
## [1] NaN
enet_model$plot
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

roc_c

Genus level

level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]
pre_ltx vs healthy
group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)
## Removing 148 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
##                                       [,1]
## alpha                           1.00000000
## lambda                          0.01369607
## auc                             1.00000000
## auc_czech                       1.00000000
## auc_no                                 NaN
## auc_optimism_corrected          0.94053612
## auc_optimism_corrected_CIL      0.90046278
## auc_optimism_corrected_CIU      0.98930682
## accuracy                        1.00000000
## accuracy_czech                         NaN
## accuracy_no                            NaN
## accuracy_optimism_corrected     0.84937424
## accuracy_optimism_corrected_CIL 0.76972516
## accuracy_optimism_corrected_CIU 0.93963675
enet_model$conf_matrices
## $original
##     Predicted
## True  0  1
##    0 95  0
##    1  0 42
## 
## $czech
##     Predicted
## True  0  1
##    0 95  0
##    1  0 42
## 
## $no
## [1] NaN
enet_model$plot
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

roc_c

pre_ltx vs post_ltx
group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)
## Removing 17 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
##                                       [,1]
## alpha                           0.00000000
## lambda                          0.04874797
## auc                             1.00000000
## auc_czech                       1.00000000
## auc_no                                 NaN
## auc_optimism_corrected          0.75472529
## auc_optimism_corrected_CIL      0.54235491
## auc_optimism_corrected_CIU      0.83904574
## accuracy                        1.00000000
## accuracy_czech                         NaN
## accuracy_no                            NaN
## accuracy_optimism_corrected     0.81716332
## accuracy_optimism_corrected_CIL 0.76429070
## accuracy_optimism_corrected_CIU 0.85651042
enet_model$conf_matrices
## $original
##     Predicted
## True   0   1
##    0 209   0
##    1   0  42
## 
## $czech
##     Predicted
## True   0   1
##    0 209   0
##    1   0  42
## 
## $no
## [1] NaN
enet_model$plot
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

roc_c

post_ltx vs healthy
group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)
## Removing 5 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group",
                              N=10,
                              clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
##                                       [,1]
## alpha                           0.20000000
## lambda                          0.00352282
## auc                             1.00000000
## auc_czech                       1.00000000
## auc_no                                 NaN
## auc_optimism_corrected          0.97314964
## auc_optimism_corrected_CIL      0.95668510
## auc_optimism_corrected_CIU      0.98693210
## accuracy                        1.00000000
## accuracy_czech                         NaN
## accuracy_no                            NaN
## accuracy_optimism_corrected     0.91758825
## accuracy_optimism_corrected_CIL 0.88106563
## accuracy_optimism_corrected_CIU 0.94831081
enet_model$conf_matrices
## $original
##     Predicted
## True   0   1
##    0  95   0
##    1   0 209
## 
## $czech
##     Predicted
## True   0   1
##    0  95   0
##    1   0 209
## 
## $no
## [1] NaN
enet_model$plot
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

roc_c

Saving results

models_summ_df_colon <- do.call(rbind, 
  models_summ[grep(segment,names(models_summ),value = TRUE)])

write.csv(models_summ_df_colon,file.path(path,paste0("elastic_net_",segment,".csv")))

Supplementary models

CLR-transformed data

kNN
model="knn"
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)
## Removing 1808 ASV(s)
## Removing 13 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               10.0000000
## auc                              0.9740602
## auc_optimism_corrected           0.9085611
## auc_optimism_corrected_CIL       0.8300600
## auc_optimism_corrected_CIU       0.9923750
## accuracy                         0.9270073
## accuracy_optimism_corrected      0.8742371
## accuracy_optimism_corrected_CIL  0.7918421
## accuracy_optimism_corrected_CIU  0.9439637
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)


# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)
## Removing 743 ASV(s)
## Removing 13 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               11.0000000
## auc                              0.9285714
## auc_optimism_corrected           0.7688393
## auc_optimism_corrected_CIL       0.6323093
## auc_optimism_corrected_CIU       0.8526483
## accuracy                         0.8840000
## accuracy_optimism_corrected      0.7978995
## accuracy_optimism_corrected_CIL  0.7454545
## accuracy_optimism_corrected_CIU  0.8586340
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)
## Removing 232 ASV(s)
## Removing 4 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               30.0000000
## auc                              0.9594383
## auc_optimism_corrected           0.8765328
## auc_optimism_corrected_CIL       0.8292319
## auc_optimism_corrected_CIU       0.9283138
## accuracy                         0.9009901
## accuracy_optimism_corrected      0.7962614
## accuracy_optimism_corrected_CIL  0.7160377
## accuracy_optimism_corrected_CIU  0.8768596
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)
## Removing 148 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               10.0000000
## auc                              0.9761905
## auc_optimism_corrected           0.8510016
## auc_optimism_corrected_CIL       0.7579809
## auc_optimism_corrected_CIU       0.9430530
## accuracy                         0.8467153
## accuracy_optimism_corrected      0.7932624
## accuracy_optimism_corrected_CIL  0.7231440
## accuracy_optimism_corrected_CIU  0.9186564
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)
## Removing 17 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               10.0000000
## auc                              0.9276601
## auc_optimism_corrected           0.6952586
## auc_optimism_corrected_CIL       0.5157044
## auc_optimism_corrected_CIU       0.8371418
## accuracy                         0.8725100
## accuracy_optimism_corrected      0.8041781
## accuracy_optimism_corrected_CIL  0.7435581
## accuracy_optimism_corrected_CIU  0.8781787
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)
## Removing 5 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model"]][[model_name]] <- knn_model$model_summary
supplements_models[["roc_cs"]][["knn_model"]][[model_name]] <- knn_model$kfold_rocobjs


# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               30.0000000
## auc                              0.9755981
## auc_optimism_corrected           0.8877868
## auc_optimism_corrected_CIL       0.8342668
## auc_optimism_corrected_CIU       0.9326126
## accuracy                         0.8026316
## accuracy_optimism_corrected      0.7431760
## accuracy_optimism_corrected_CIL  0.6842593
## accuracy_optimism_corrected_CIU  0.7972124
roc_c

Random Forest
model="rf"
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)
## Removing 1808 ASV(s)
## Removing 13 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "73"       
## splitrule                       "gini"     
## min.node.size                   "5"        
## auc                             "1"        
## auc_optimism_corrected          "0.9241346"
## auc_optimism_corrected_CIL      "0.8647446"
## auc_optimism_corrected_CIU      "0.9664063"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8123775"
## accuracy_optimism_corrected_CIL "0.7444715"
## accuracy_optimism_corrected_CIU "0.9066061"
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)
## Removing 743 ASV(s)
## Removing 13 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "225"      
## splitrule                       "gini"     
## min.node.size                   "5"        
## auc                             "1"        
## auc_optimism_corrected          "0.7490965"
## auc_optimism_corrected_CIL      "0.5914776"
## auc_optimism_corrected_CIU      "0.8699399"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8050533"
## accuracy_optimism_corrected_CIL "0.7619048"
## accuracy_optimism_corrected_CIU "0.8537056"
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)
## Removing 232 ASV(s)
## Removing 4 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "99"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.9365846"
## auc_optimism_corrected_CIL      "0.8922044"
## auc_optimism_corrected_CIU      "0.980926" 
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8948373"
## accuracy_optimism_corrected_CIL "0.8345397"
## accuracy_optimism_corrected_CIU "0.9444"
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)
## Removing 148 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "163"      
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.9259073"
## auc_optimism_corrected_CIL      "0.8316926"
## auc_optimism_corrected_CIU      "0.9934783"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8508802"
## accuracy_optimism_corrected_CIL "0.7838161"
## accuracy_optimism_corrected_CIU "0.9338652"
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)
## Removing 17 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "177"      
## splitrule                       "gini"     
## min.node.size                   "5"        
## auc                             "1"        
## auc_optimism_corrected          "0.6897877"
## auc_optimism_corrected_CIL      "0.5414078"
## auc_optimism_corrected_CIU      "0.773423" 
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8044215"
## accuracy_optimism_corrected_CIL "0.7520434"
## accuracy_optimism_corrected_CIU "0.8841924"
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)
## Removing 5 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model"]][[model_name]] <- rf_model$model_summary
supplements_models[["roc_cs"]][["rf_model"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "17"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.9550928"
## auc_optimism_corrected_CIL      "0.9139744"
## auc_optimism_corrected_CIU      "0.9872226"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8980211"
## accuracy_optimism_corrected_CIL "0.860119" 
## accuracy_optimism_corrected_CIU "0.9328664"
roc_c

Gradient boosting
model="gb"
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)
## Removing 1808 ASV(s)
## Removing 13 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "17"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.9550928"
## auc_optimism_corrected_CIL      "0.9139744"
## auc_optimism_corrected_CIU      "0.9872226"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8980211"
## accuracy_optimism_corrected_CIL "0.860119" 
## accuracy_optimism_corrected_CIU "0.9328664"
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)
## Removing 743 ASV(s)
## Removing 13 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs


# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "17"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.9550928"
## auc_optimism_corrected_CIL      "0.9139744"
## auc_optimism_corrected_CIU      "0.9872226"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8980211"
## accuracy_optimism_corrected_CIL "0.860119" 
## accuracy_optimism_corrected_CIU "0.9328664"
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",
                                     patient = TRUE)
## Removing 232 ASV(s)
## Removing 4 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs


# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "17"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.9550928"
## auc_optimism_corrected_CIL      "0.9139744"
## auc_optimism_corrected_CIU      "0.9872226"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8980211"
## accuracy_optimism_corrected_CIL "0.860119" 
## accuracy_optimism_corrected_CIU "0.9328664"
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)
## Removing 148 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "17"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.9550928"
## auc_optimism_corrected_CIL      "0.9139744"
## auc_optimism_corrected_CIU      "0.9872226"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8980211"
## accuracy_optimism_corrected_CIL "0.860119" 
## accuracy_optimism_corrected_CIU "0.9328664"
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)
## Removing 17 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "17"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.9550928"
## auc_optimism_corrected_CIL      "0.9139744"
## auc_optimism_corrected_CIU      "0.9872226"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8980211"
## accuracy_optimism_corrected_CIL "0.860119" 
## accuracy_optimism_corrected_CIU "0.9328664"
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",
                                     patient = TRUE)
## Removing 5 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model"]][[model_name]] <- gbm_model$model_summary
supplements_models[["roc_cs"]][["gbm_model"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "17"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.9550928"
## auc_optimism_corrected_CIL      "0.9139744"
## auc_optimism_corrected_CIU      "0.9872226"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8980211"
## accuracy_optimism_corrected_CIL "0.860119" 
## accuracy_optimism_corrected_CIU "0.9328664"
roc_c

Relative abundances

Elastic net
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)
## Removing 1808 ASV(s)
## Removing 13 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
##                                       [,1]
## alpha                           0.20000000
## lambda                          0.05188328
## auc                             1.00000000
## auc_czech                       1.00000000
## auc_no                                 NaN
## auc_optimism_corrected          0.89858321
## auc_optimism_corrected_CIL      0.76586912
## auc_optimism_corrected_CIU      0.97379261
## accuracy                        1.00000000
## accuracy_czech                         NaN
## accuracy_no                            NaN
## accuracy_optimism_corrected     0.83973932
## accuracy_optimism_corrected_CIL 0.77165385
## accuracy_optimism_corrected_CIU 0.93386525
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)
## Removing 743 ASV(s)
## Removing 13 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
##                                       [,1]
## alpha                           0.60000000
## lambda                          0.01941712
## auc                             1.00000000
## auc_czech                       1.00000000
## auc_no                                 NaN
## auc_optimism_corrected          0.66876106
## auc_optimism_corrected_CIL      0.50000000
## auc_optimism_corrected_CIU      0.82942892
## accuracy                        1.00000000
## accuracy_czech                         NaN
## accuracy_no                            NaN
## accuracy_optimism_corrected     0.80901776
## accuracy_optimism_corrected_CIL 0.76190476
## accuracy_optimism_corrected_CIU 0.86240006
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)


# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)
## Removing 232 ASV(s)
## Removing 4 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
##                                       [,1]
## alpha                           0.80000000
## lambda                          0.00683854
## auc                             1.00000000
## auc_czech                       1.00000000
## auc_no                                 NaN
## auc_optimism_corrected          0.89770344
## auc_optimism_corrected_CIL      0.82355908
## auc_optimism_corrected_CIU      0.96320425
## accuracy                        1.00000000
## accuracy_czech                         NaN
## accuracy_no                            NaN
## accuracy_optimism_corrected     0.82505124
## accuracy_optimism_corrected_CIL 0.72905487
## accuracy_optimism_corrected_CIU 0.89347623
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)
## Removing 148 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
##                                       [,1]
## alpha                           0.20000000
## lambda                          0.04255809
## auc                             1.00000000
## auc_czech                       1.00000000
## auc_no                                 NaN
## auc_optimism_corrected          0.88942146
## auc_optimism_corrected_CIL      0.75637679
## auc_optimism_corrected_CIU      0.98956522
## accuracy                        1.00000000
## accuracy_czech                         NaN
## accuracy_no                            NaN
## accuracy_optimism_corrected     0.83286249
## accuracy_optimism_corrected_CIL 0.75306818
## accuracy_optimism_corrected_CIU 0.91865642
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)


# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)
## Removing 17 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
##                                       [,1]
## alpha                           0.00000000
## lambda                          0.08050446
## auc                             1.00000000
## auc_czech                       1.00000000
## auc_no                                 NaN
## auc_optimism_corrected          0.61705894
## auc_optimism_corrected_CIL      0.50044118
## auc_optimism_corrected_CIU      0.77259688
## accuracy                        0.98007968
## accuracy_czech                         NaN
## accuracy_no                            NaN
## accuracy_optimism_corrected     0.78668254
## accuracy_optimism_corrected_CIL 0.74549419
## accuracy_optimism_corrected_CIU 0.83364326
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)
## Removing 5 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(enet_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["enet_model_ra"]][[model_name]] <- enet_model$model_summary

supplements_models[["roc_cs"]][["enet_model_ra"]][[model_name]] <- enet_model$kfold_rocobjs

# see the results
enet_model$model_summary %>% t()
##                                        [,1]
## alpha                           0.600000000
## lambda                          0.005060013
## auc                             1.000000000
## auc_czech                       1.000000000
## auc_no                                  NaN
## auc_optimism_corrected          0.908839490
## auc_optimism_corrected_CIL      0.879280481
## auc_optimism_corrected_CIU      0.949067862
## accuracy                        1.000000000
## accuracy_czech                          NaN
## accuracy_no                             NaN
## accuracy_optimism_corrected     0.848866015
## accuracy_optimism_corrected_CIL 0.816775599
## accuracy_optimism_corrected_CIU 0.879510008
roc_c

kNN
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)
## Removing 1808 ASV(s)
## Removing 13 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               11.0000000
## auc                              0.9296992
## auc_optimism_corrected           0.6479469
## auc_optimism_corrected_CIL       0.4585905
## auc_optimism_corrected_CIU       0.8331452
## accuracy                         0.7153285
## accuracy_optimism_corrected      0.6797730
## accuracy_optimism_corrected_CIL  0.5847160
## accuracy_optimism_corrected_CIU  0.8562678
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)
## Removing 743 ASV(s)
## Removing 13 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               30.0000000
## auc                              0.8895375
## auc_optimism_corrected           0.6464599
## auc_optimism_corrected_CIL       0.4859681
## auc_optimism_corrected_CIU       0.7592457
## accuracy                         0.8320000
## accuracy_optimism_corrected      0.8058950
## accuracy_optimism_corrected_CIL  0.7619048
## accuracy_optimism_corrected_CIU  0.8506443
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)
## Removing 232 ASV(s)
## Removing 4 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               28.0000000
## auc                              0.8506832
## auc_optimism_corrected           0.7208749
## auc_optimism_corrected_CIL       0.6189545
## auc_optimism_corrected_CIU       0.8390812
## accuracy                         0.7953795
## accuracy_optimism_corrected      0.6946665
## accuracy_optimism_corrected_CIL  0.6099932
## accuracy_optimism_corrected_CIU  0.7814890
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)
## Removing 148 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               10.0000000
## auc                              0.9432331
## auc_optimism_corrected           0.6025594
## auc_optimism_corrected_CIL       0.4761007
## auc_optimism_corrected_CIU       0.7437072
## accuracy                         0.7226277
## accuracy_optimism_corrected      0.6855019
## accuracy_optimism_corrected_CIL  0.6009375
## accuracy_optimism_corrected_CIU  0.8623397
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)
## Removing 17 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               10.0000000
## auc                              0.9046480
## auc_optimism_corrected           0.6067022
## auc_optimism_corrected_CIL       0.4877012
## auc_optimism_corrected_CIU       0.6966009
## accuracy                         0.8406375
## accuracy_optimism_corrected      0.7874867
## accuracy_optimism_corrected_CIL  0.7561442
## accuracy_optimism_corrected_CIU  0.8230462
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)
## Removing 5 ASV(s)
# fit the model
knn_model <- knn_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(knn_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["knn_model_ra"]][[model_name]] <- knn_model$model_summary

supplements_models[["roc_cs"]][["knn_model_ra"]][[model_name]] <- knn_model$kfold_rocobjs

# see the results
knn_model$model_summary %>% t()
##                                       [,1]
## k                               11.0000000
## auc                              0.9049106
## auc_optimism_corrected           0.7556965
## auc_optimism_corrected_CIL       0.6097428
## auc_optimism_corrected_CIU       0.8283564
## accuracy                         0.8618421
## accuracy_optimism_corrected      0.7102348
## accuracy_optimism_corrected_CIL  0.5993561
## accuracy_optimism_corrected_CIU  0.7815629
roc_c

Random Forest
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)
## Removing 1808 ASV(s)
## Removing 13 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs

# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "81"       
## splitrule                       "gini"     
## min.node.size                   "5"        
## auc                             "1"        
## auc_optimism_corrected          "0.943333" 
## auc_optimism_corrected_CIL      "0.8778251"
## auc_optimism_corrected_CIU      "0.995"    
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8572153"
## accuracy_optimism_corrected_CIL "0.7968816"
## accuracy_optimism_corrected_CIU "0.9200926"
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)
## Removing 743 ASV(s)
## Removing 13 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "91"       
## splitrule                       "gini"     
## min.node.size                   "5"        
## auc                             "1"        
## auc_optimism_corrected          "0.8150053"
## auc_optimism_corrected_CIL      "0.6932038"
## auc_optimism_corrected_CIU      "0.9355531"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8184509"
## accuracy_optimism_corrected_CIL "0.7672619"
## accuracy_optimism_corrected_CIU "0.8647197"
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)
## Removing 232 ASV(s)
## Removing 4 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "23"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.9610293"
## auc_optimism_corrected_CIL      "0.929291" 
## auc_optimism_corrected_CIU      "0.9849327"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8973151"
## accuracy_optimism_corrected_CIL "0.8246069"
## accuracy_optimism_corrected_CIU "0.934145"
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)
## Removing 148 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "25"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.9377114"
## auc_optimism_corrected_CIL      "0.8565378"
## auc_optimism_corrected_CIU      "0.9941304"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8483887"
## accuracy_optimism_corrected_CIL "0.7644926"
## accuracy_optimism_corrected_CIU "0.9396368"
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)
## Removing 17 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "181"      
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.7142251"
## auc_optimism_corrected_CIL      "0.5346593"
## auc_optimism_corrected_CIU      "0.7945068"
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8053349"
## accuracy_optimism_corrected_CIL "0.7583286"
## accuracy_optimism_corrected_CIU "0.8795533"
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)
## Removing 5 ASV(s)
# fit the model
rf_model <- rf_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                        clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(rf_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["rf_model_ra"]][[model_name]] <- rf_model$model_summary

supplements_models[["roc_cs"]][["rf_model_ra"]][[model_name]] <- rf_model$kfold_rocobjs


# see the results
rf_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "55"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.9587751"
## auc_optimism_corrected_CIL      "0.9118397"
## auc_optimism_corrected_CIU      "0.994497" 
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8863985"
## accuracy_optimism_corrected_CIL "0.8151464"
## accuracy_optimism_corrected_CIU "0.9170257"
roc_c

Gradient boosting
ASV level
level="ASV"

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)
## Removing 1808 ASV(s)
## Removing 13 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "55"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.9587751"
## auc_optimism_corrected_CIL      "0.9118397"
## auc_optimism_corrected_CIU      "0.994497" 
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8863985"
## accuracy_optimism_corrected_CIL "0.8151464"
## accuracy_optimism_corrected_CIU "0.9170257"
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)
## Removing 743 ASV(s)
## Removing 13 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "55"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.9587751"
## auc_optimism_corrected_CIL      "0.9118397"
## auc_optimism_corrected_CIU      "0.994497" 
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8863985"
## accuracy_optimism_corrected_CIL "0.8151464"
## accuracy_optimism_corrected_CIU "0.9170257"
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_ra",
                                     patient = TRUE)
## Removing 232 ASV(s)
## Removing 4 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "55"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.9587751"
## auc_optimism_corrected_CIL      "0.9118397"
## auc_optimism_corrected_CIU      "0.994497" 
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8863985"
## accuracy_optimism_corrected_CIL "0.8151464"
## accuracy_optimism_corrected_CIU "0.9170257"
roc_c

Genus level
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]

pre_ltx vs healthy

group <- c("pre_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)
## Removing 148 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "55"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.9587751"
## auc_optimism_corrected_CIL      "0.9118397"
## auc_optimism_corrected_CIU      "0.994497" 
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8863985"
## accuracy_optimism_corrected_CIL "0.8151464"
## accuracy_optimism_corrected_CIU "0.9170257"
roc_c

pre_ltx vs post_ltx

group <- c("pre_ltx","post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)
## Removing 17 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "55"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.9587751"
## auc_optimism_corrected_CIL      "0.9118397"
## auc_optimism_corrected_CIU      "0.994497" 
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8863985"
## accuracy_optimism_corrected_CIL "0.8151464"
## accuracy_optimism_corrected_CIU "0.9170257"
roc_c

post_ltx vs healthy

group <- c("post_ltx","healthy")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_ra",
                                     patient = TRUE)
## Removing 5 ASV(s)
# fit the model
gbm_model <- gbm_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                          clust_var="Patient",
                              reuse=TRUE,
                              file=model_name,
                              Q="Q1_czech")

# ROC curve
roc_c <- roc_curve(gbm_model, group)
## Warning in geom_line(aes(x = `1-specificity`, y = sensitivity, by = name, : Ignoring unknown aesthetics: by
# save the results
supplements_models[["models_summ"]][["gbm_model_ra"]][[model_name]] <- gbm_model$model_summary

supplements_models[["roc_cs"]][["gbm_model_ra"]][[model_name]] <- gbm_model$kfold_rocobjs

# see the results
gbm_model$model_summary %>% t()
##                                 [,1]       
## mtry                            "55"       
## splitrule                       "gini"     
## min.node.size                   "2"        
## auc                             "1"        
## auc_optimism_corrected          "0.9587751"
## auc_optimism_corrected_CIL      "0.9118397"
## auc_optimism_corrected_CIU      "0.994497" 
## accuracy                        "1"        
## accuracy_optimism_corrected     "0.8863985"
## accuracy_optimism_corrected_CIL "0.8151464"
## accuracy_optimism_corrected_CIU "0.9170257"
roc_c

Saving results

models_list <- list()

for (model_name in names(supplements_models$models_summ)){
  df <- do.call(rbind, supplements_models$models_summ[[model_name]])
  models_list[[model_name]] <- df
}

write.xlsx(models_list,
           file=file.path(path,paste0("supplements_models_",segment,".xlsx")),
           rowNames=TRUE)

Results overview

Alpha diversity

pc_observed[[segment]]
pc_shannon[[segment]]
pc_simpson[[segment]]
pc_pielou[[segment]]

Plots

alpha_div_plots[[paste(segment,"Country")]]
## NULL
alpha_div_plots[[paste(segment,"Custom")]]

Beta diversity

Main results

pairwise_aitchison_raw[[paste("genus", segment)]]

PCA

pca_plots_list[[paste(segment,"genus custom")]]

Supplements

knitr::kable(supplements_beta[!grepl("PCoA",names(supplements_beta))],
             digits = 3,
             caption = "Supplementary PERMANOVA results")
Supplementary PERMANOVA results
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 0.412 2.198 0.036 0.008 0.012 *
pre_ltx vs post_ltx 1 0.230 1.052 0.008 0.350 0.350
post_ltx vs healthy 1 0.771 3.752 0.027 0.001 0.003 **
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 0.499 1.835 0.030 0.005 0.007 **
pre_ltx vs post_ltx 1 0.333 1.101 0.009 0.262 0.262
post_ltx vs healthy 1 0.865 2.980 0.021 0.001 0.003 **
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 484.192 1.552 0.026 0.002 0.003 **
pre_ltx vs post_ltx 1 320.395 1.130 0.009 0.121 0.121
post_ltx vs healthy 1 877.994 2.987 0.021 0.001 0.003 **
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 0.566 1.748 0.029 0.007 0.011 *
pre_ltx vs post_ltx 1 0.437 1.284 0.010 0.113 0.113
post_ltx vs healthy 1 1.212 3.689 0.026 0.001 0.003 **
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 0.558 1.423 0.024 0.008 0.012 *
pre_ltx vs post_ltx 1 0.474 1.174 0.009 0.102 0.102
post_ltx vs healthy 1 1.027 2.597 0.019 0.001 0.003 **
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 0.668 3.662 0.026 0.053 0.053
pre_ltx vs post_ltx 1 0.700 3.489 0.014 0.014 0.021 *
post_ltx vs healthy 1 1.306 7.046 0.023 0.001 0.003 **
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 0.752 2.816 0.020 0.067 0.067
pre_ltx vs post_ltx 1 0.830 2.894 0.011 0.022 0.033 *
post_ltx vs healthy 1 1.461 5.331 0.017 0.001 0.003 **
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 453.896 1.573 0.012 0.779 0.779
pre_ltx vs post_ltx 1 515.107 1.889 0.008 0.323 0.485
post_ltx vs healthy 1 1332.113 4.762 0.016 0.001 0.003 **
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 0.703 2.227 0.016 0.247 0.247
pre_ltx vs post_ltx 1 0.940 2.914 0.012 0.022 0.033 *
post_ltx vs healthy 1 2.080 6.653 0.022 0.001 0.003 **
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
pre_ltx vs healthy 1 0.681 1.767 0.013 0.632 0.632
pre_ltx vs post_ltx 1 0.888 2.272 0.009 0.073 0.109
post_ltx vs healthy 1 1.733 4.508 0.015 0.001 0.003 **

PCA

plot_list <- supplements_beta[grepl("PCoA",names(supplements_beta)) &
                              grepl(segment,names(supplements_beta))]

ggarrange(plotlist = plot_list,
          labels=names(plot_list),
          font.label = list(size=5,face="plain"),
          ncol=2,nrow=3)

Univariate analysis

Number of significant taxa

knitr::kable(cbind(as.data.frame(lapply(list_intersections,nrow)),
      as.data.frame(lapply(psc_effect,nrow))) %>% t() %>% 
  `colnames<-`("Count") %>% 
  `rownames<-`(c(names(list_intersections),"PSC effect ASV","PSC effect Genus","PSC effect Phylum")),caption="Number of significant taxa")
Number of significant taxa
Count
colon genus healthy vs pre_ltx 33
colon genus pre_ltx vs post_ltx 10
colon genus healthy vs post_ltx 46
colon ASV healthy vs pre_ltx 84
colon ASV pre_ltx vs post_ltx 6
colon ASV healthy vs post_ltx 105
colon phylum healthy vs pre_ltx 2
colon phylum pre_ltx vs post_ltx 3
colon phylum healthy vs post_ltx 5
PSC effect ASV 20
PSC effect Genus 52
PSC effect Phylum 2

Counts

# univar_list <- univariate_statistics(list_intersections,
#                                      psc_effect,
#                                      colon_genus_asv_taxa_tab)
# 
# univar_df <- univar_list[[1]]
# wb <- univar_list[[2]]
# 
# # save the results
# saveWorkbook(wb,"results/Q1/DAA_final_terminal_colon.xlsx", overwrite = TRUE)
# 
# # see the results
# univar_df

Machine learning

Main models

Summary

knitr::kable(models_summ_df_colon %>% dplyr::select(
"alpha","lambda",
"auc_optimism_corrected",
"auc_optimism_corrected_CIL",
"auc_optimism_corrected_CIU"),
             digits=3,caption="Elastic net results")
Elastic net results
alpha lambda auc_optimism_corrected auc_optimism_corrected_CIL auc_optimism_corrected_CIU
pre_ltx vs healthy ASV colon 0.2 0.077 0.925 0.836 0.980
pre_ltx vs post_ltx ASV colon 1.0 0.008 0.846 0.733 0.915
post_ltx vs healthy ASV colon 0.4 0.008 0.976 0.959 0.990
pre_ltx vs healthy genus colon 1.0 0.014 0.941 0.900 0.989
pre_ltx vs post_ltx genus colon 0.0 0.049 0.755 0.542 0.839
post_ltx vs healthy genus colon 0.2 0.004 0.973 0.957 0.987

ROC - ASV level

roc_curve_all_custom(roc_cs[c(7:9)], 
                     Q="Q1_czech",
                     model_name="enet_model")
## [1] "pre_ltx vs healthy ASV colon"  "pre_ltx vs post_ltx ASV colon" "post_ltx vs healthy ASV colon"
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values

ROC - Genus level

roc_curve_all_custom(roc_cs[c(10:12)],Q="Q1_czech",
                     model_name="enet_model")
## [1] "pre_ltx vs healthy genus colon"  "pre_ltx vs post_ltx genus colon" "post_ltx vs healthy genus colon"
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm): collapsing to unique 'x' values

Supplementary models

Summary

# Build final dataframe
models_list[["enet_model"]] <- rbind(models_summ_df_colon,models_summ_df_colon)
final_df <- tibble(row_names = rownames(models_list[[1]]))

# Loop through models and extract required values
for (model_name in names(models_list)) {
  model_df <- models_list[[model_name]]
  
  # Combine AUC_optimism_corrected with its CI values
  final_df[[model_name]] <- paste0(
    round(model_df$auc_optimism_corrected, 3), 
    " (", round(model_df$auc_optimism_corrected_CIL, 3), "; ", 
    round(model_df$auc_optimism_corrected_CIU, 3), ")"
  )
}

knitr::kable(final_df, caption="All models")
All models
row_names knn_model rf_model gbm_model enet_model_ra knn_model_ra rf_model_ra gbm_model_ra enet_model
pre_ltx vs healthy ASV terminal_ileum 0.911 (0.846; 0.967) 0.829 (0.577; 0.984) 0.853 (0.72; 0.927) 0.845 (0.706; 0.944) 0.699 (0.52; 0.874) 0.917 (0.778; 0.984) 0.869 (0.783; 0.942) 0.925 (0.836; 0.98)
pre_ltx vs post_ltx ASV terminal_ileum 0.677 (0.585; 0.787) 0.656 (0.578; 0.735) 0.853 (0.72; 0.927) 0.565 (0.372; 0.822) 0.667 (0.574; 0.83) 0.794 (0.656; 0.887) 0.869 (0.783; 0.942) 0.846 (0.733; 0.915)
post_ltx vs healthy ASV terminal_ileum 0.818 (0.705; 0.952) 0.865 (0.747; 0.948) 0.853 (0.72; 0.927) 0.795 (0.708; 0.869) 0.704 (0.601; 0.791) 0.923 (0.852; 0.966) 0.869 (0.783; 0.942) 0.976 (0.959; 0.99)
pre_ltx vs healthy genus terminal_ileum 0.858 (0.791; 0.971) 0.882 (0.769; 0.963) 0.853 (0.72; 0.927) 0.851 (0.744; 0.983) 0.672 (0.456; 0.962) 0.922 (0.851; 0.976) 0.869 (0.783; 0.942) 0.941 (0.9; 0.989)
pre_ltx vs post_ltx genus terminal_ileum 0.693 (0.559; 0.805) 0.708 (0.503; 0.863) 0.853 (0.72; 0.927) 0.608 (0.492; 0.74) 0.597 (0.468; 0.69) 0.749 (0.634; 0.845) 0.869 (0.783; 0.942) 0.755 (0.542; 0.839)
post_ltx vs healthy genus terminal_ileum 0.892 (0.841; 0.962) 0.853 (0.72; 0.927) 0.853 (0.72; 0.927) 0.781 (0.547; 0.871) 0.71 (0.571; 0.796) 0.869 (0.783; 0.942) 0.869 (0.783; 0.942) 0.973 (0.957; 0.987)
pre_ltx vs healthy ASV colon 0.909 (0.83; 0.992) 0.924 (0.865; 0.966) 0.955 (0.914; 0.987) 0.899 (0.766; 0.974) 0.648 (0.459; 0.833) 0.943 (0.878; 0.995) 0.959 (0.912; 0.994) 0.925 (0.836; 0.98)
pre_ltx vs post_ltx ASV colon 0.769 (0.632; 0.853) 0.749 (0.591; 0.87) 0.955 (0.914; 0.987) 0.669 (0.5; 0.829) 0.646 (0.486; 0.759) 0.815 (0.693; 0.936) 0.959 (0.912; 0.994) 0.846 (0.733; 0.915)
post_ltx vs healthy ASV colon 0.877 (0.829; 0.928) 0.937 (0.892; 0.981) 0.955 (0.914; 0.987) 0.898 (0.824; 0.963) 0.721 (0.619; 0.839) 0.961 (0.929; 0.985) 0.959 (0.912; 0.994) 0.976 (0.959; 0.99)
pre_ltx vs healthy genus colon 0.851 (0.758; 0.943) 0.926 (0.832; 0.993) 0.955 (0.914; 0.987) 0.889 (0.756; 0.99) 0.603 (0.476; 0.744) 0.938 (0.857; 0.994) 0.959 (0.912; 0.994) 0.941 (0.9; 0.989)
pre_ltx vs post_ltx genus colon 0.695 (0.516; 0.837) 0.69 (0.541; 0.773) 0.955 (0.914; 0.987) 0.617 (0.5; 0.773) 0.607 (0.488; 0.697) 0.714 (0.535; 0.795) 0.959 (0.912; 0.994) 0.755 (0.542; 0.839)
post_ltx vs healthy genus colon 0.888 (0.834; 0.933) 0.955 (0.914; 0.987) 0.955 (0.914; 0.987) 0.909 (0.879; 0.949) 0.756 (0.61; 0.828) 0.959 (0.912; 0.994) 0.959 (0.912; 0.994) 0.973 (0.957; 0.987)
write.csv(final_df,file=file.path(path,"AUC_all_models.csv"),row.names = FALSE)

ROC - ASV

rocs_list <- supplements_models$roc_cs
rocs_list[["enet_model"]] <- roc_cs

plot_list <- list()

for (model_name in names(rocs_list)) {
  plot_list[[model_name]] <- roc_curve_all_custom(rocs_list[[model_name]][c(1:3)],
                       Q="Q1_czech",
                       model_name=model_name)
}

ggarrange(plotlist = plot_list,labels = names(rocs_list),font.label = list(face="plain",size=7))

ROC - genus

plot_list <- list()

for (model_name in names(rocs_list)) {
  plot_list[[model_name]] <- roc_curve_all_custom(rocs_list[[model_name]][c(4:6)],
                       Q="Q1_czech",
                       model_name=model_name)
}

ggarrange(plotlist = plot_list,labels = names(rocs_list),font.label = list(face="plain",size=7))